# --- SET PARAMETERS FOR R MARKDOWN AND THE GENERAL RUNNING OF THE CODE --- #
set.seed(420)
knitr::opts_chunk$set(message = FALSE,
cache = FALSE,
autodep = FALSE)
start_time = Sys.time()
recompute_lengthy_analyses = FALSE
recompute_metaeco_analysis = TRUE
# --- SET PARAMETERS NECESSARY TO USE THE PACKAGE BEMOVI (INCLUDES SAMPLING PARAMETERS) --- #
# Define time points and associated sampling days
time_points = 0:4
time_points_for_ES = 1:4 #Time points for which the effect size was calculated
sampling_days = c(0, 5, 12, 19, 26)
# Define parameters used for analysing the videos in the package BEMOVI
fps = 25 # Frames per second (frame rate of the video)
seconds_per_video = 5 # Duration of each video in seconds
total_frames = fps * seconds_per_video # Total number of frames in each video
measured_volume = 34.4 # Volume measured for each video in microliters (µL)
volume_recorded_μl = measured_volume # Storing the measured volume in µL
volume_recorded_ml = volume_recorded_μl * 10**-3 # Converting the volume from µL to milliliters (mL)
pixel_to_scale = 4.05 # Conversion factor for pixels to µm
filter_min_net_disp = 25 # Minimum net displacement for an object to be considered a protist (pixels)
filter_min_duration = 1 # Minimum duration an object must be tracked to be considered a protist (seconds)
filter_detection_freq = 0.1 # Minimum frequency of detection in the video for an object to be considered a protist (detected frames / total frames)
filter_median_step_length = 3 # Minimum step length for an object to be considered a protist (pixels)
threshold_levels = c(13, 40) # Two lower pixel intensity thresholds for ImageJ to distinguish background from protists (higher threshold used for Ble)
video.description.folder = "0_video_description/" # Folder where the video descriptions (metadata about each video) are stored
video.description.file = "video_description.txt" # Filename of the text file containing the description of each video
merged_data_folder = "5_merged_data/" # Folder where the merged data files from the analysis will be stored
# Define columns that will be used for identifying species based on their movement and morphological characteristics
columns_for_species_ID = c(
"mean_grey", # Mean grayscale value (intensity) of the detected protist (brightness)
"sd_grey", # Standard deviation of the grayscale values (intensity variation)
"mean_area", # Mean area (in pixels) of the detected protist
"sd_area", # Standard deviation of the protist's area
"mean_perimeter", # Mean perimeter (length around the protist) of the detected protist
"mean_turning", # Mean turning angle of the protist's trajectory (how much the protist changes direction)
"sd_turning", # Standard deviation of the turning angles (variability in direction change)
"sd_perimeter", # Standard deviation of the protist's perimeter
"mean_major", # Mean length of the major axis of the protist (longest dimension)
"sd_major", # Standard deviation of the major axis length
"mean_minor", # Mean length of the minor axis of the protist (shortest dimension)
"sd_minor", # Standard deviation of the minor axis length
"mean_ar", # Mean aspect ratio (ratio of major to minor axis length)
"sd_ar", # Standard deviation of the aspect ratio
"duration", # Duration of time the protist is tracked in the video
"max_net", # Maximum net displacement (greatest straight-line distance traveled)
"net_disp", # Total net displacement (total straight-line distance from start to end of the trajectory)
"net_speed", # Mean net speed (speed based on net displacement)
"gross_disp", # Total gross displacement (total distance traveled including all movement)
"max_step", # Maximum step length (largest distance moved between two consecutive frames)
"min_step", # Minimum step length (smallest distance moved between two consecutive frames)
"sd_step", # Standard deviation of the step lengths (variability in frame-to-frame movement)
"sd_gross_speed", # Standard deviation of gross speed (variability in speed over time)
"max_gross_speed", # Maximum gross speed (highest speed achieved)
"min_gross_speed") # Minimum gross speed (lowest speed detected)
# --- SET PARAMETERS RELATED TO RESOURCE FLOWS --- #
resource_flow_days = c(7, 14, 21)
# --- SET PARAMETERS RELATED TO PROTISTS --- #
protist_species = c("Ble",
"Cep",
"Col",
"Eug",
"Eup",
"Lox",
"Pau",
"Pca",
"Spi",
"Spi_te",
"Tet")
protist_species_indiv_per_ml = paste0(protist_species, "_indiv_per_ml")
protist_species_bioarea_mm2_per_ml = paste0(protist_species, "_bioarea_mm2_per_ml")
protist_species_dominance = paste0(protist_species_bioarea_mm2_per_ml, "_dominance")
# --- SET PARAMETERS RELATED TO INDIVIDUALS --- #
# Initialise the dataset "ds_individuals"
ds_individuals = list()
ds_individuals_t0_extending = list()
# --- SET PARAMETERS RELATED TO ECOSYSTEMS --- #
# Import the information about ecosystems
ecosystem_info = read.csv(here("..",
"1_data",
"ecosystem_info.csv"), header = TRUE) %>%
rename(ecosystem_ID = culture_ID,
ecosystem_type = eco_metaeco_type,
ecosystem_size = patch_size,
ecosystem_size_ml = patch_size_ml) %>%
mutate(
# To have levels that are easier to read, transform them.
ecosystem_type = case_when(
ecosystem_type == "Sa" ~ "small autotrophic unconnected",
ecosystem_type == "Sh" ~ "small heterotrophic unconnected",
ecosystem_type == "Ma" ~ "medium autotrophic unconnected",
ecosystem_type == "Mh" ~ "medium heterotrophic unconnected",
ecosystem_type == "La" ~ "large autotrophic unconnected",
ecosystem_type == "Lh" ~ "large heterotrophic unconnected",
ecosystem_type == "Sa (Sa_Lh)" ~ "small autotrophic connected",
ecosystem_type == "Sh (Sh_La)" ~ "small heterotrophic connected",
ecosystem_type == "Ma (Ma_Mh)" ~ "medium autotrophic connected",
ecosystem_type == "Mh (Ma_Mh)" ~ "medium heterotrophic connected",
ecosystem_type == "La (Sh_La)" ~ "large autotrophic connected",
ecosystem_type == "Lh (Sa_Lh)" ~ "large heterotrophic connected",
TRUE ~ ecosystem_type),
ecosystem_type = factor(
ecosystem_type,
levels = c("small heterotrophic unconnected",
"small heterotrophic connected",
"small autotrophic unconnected",
"small autotrophic connected",
"medium heterotrophic unconnected",
"medium heterotrophic connected",
"medium autotrophic unconnected",
"medium autotrophic connected",
"large heterotrophic unconnected",
"large heterotrophic connected",
"large autotrophic unconnected",
"large autotrophic connected")),
ecosystem_size = case_when(
ecosystem_size == "S" ~ "small",
ecosystem_size == "M" ~ "medium",
ecosystem_size == "L" ~ "large",
TRUE ~ ecosystem_size),
ecosystem_size_and_trophy = factor(
x = paste(ecosystem_size, trophic_type),
levels = c("small autotrophic",
"medium autotrophic",
"large autotrophic",
"small heterotrophic",
"medium heterotrophic",
"large heterotrophic")),
connection = factor(
x = ifelse(metaecosystem == "yes",
"connected",
"unconnected"),
levels = c("unconnected",
"connected")),
metaecosystem = NULL,
metaecosystem_type = case_when(
ecosystem_size_and_trophy == "large heterotrophic" ~ "heterotrophic-dominated",
ecosystem_size_and_trophy == "small autotrophic" ~ "heterotrophic-dominated",
ecosystem_size_and_trophy == "large autotrophic" ~ "autotrophic-dominated",
ecosystem_size_and_trophy == "small heterotrophic" ~ "autotrophic-dominated",
ecosystem_size_and_trophy == "medium autotrophic" ~ "equally-dominated",
ecosystem_size_and_trophy == "medium heterotrophic" ~ "equally-dominated",
TRUE ~ metaecosystem_type),
metaecosystem_type = factor(metaecosystem_type,
levels = c("autotrophic-dominated",
"equally-dominated",
"heterotrophic-dominated")),
metaeco_type_n_connection = if_else(
!is.na(metaecosystem_type), # Check if metaecosystem_type is not NA
paste(metaecosystem_type, connection), # Combine if not NA
NA)) # Return NA if metaecosystem_type is NA
# Initialise the dataset "ds_ecosystems_effect_size"
ds_ecosystems_effect_size = list()
# Define ecosystems to take off because of problems in the experiment
ecosystems_to_take_off = NULL
# Define the variables you want to calculate for each ecosystem
variables_ecosystems = c("bioarea_mm2_per_ml",
"log10_bioarea_mm2_per_ml",
"ln_bioarea_mm2_per_ml",
"sqrt_bioarea_mm2_per_ml",
"cbrt_bioarea_mm2_per_ml",
"sqr_bioarea_mm2_per_ml",
"bioarea_tot_mm2",
"indiv_per_ml",
"species_richness",
"shannon",
"evenness_pielou",
protist_species_indiv_per_ml,
protist_species_bioarea_mm2_per_ml,
protist_species_dominance,
"median_body_size_µm2")
# Find the number of ecosystems in the experiment
n_ecosystems = max(ecosystem_info$ecosystem_ID)
# Define the treatments and controls
treatments_and_controls = data.frame(treatment = c("small autotrophic connected",
"small heterotrophic connected",
"medium autotrophic connected",
"medium heterotrophic connected",
"large autotrophic connected",
"large heterotrophic connected"),
control = c("small autotrophic unconnected",
"small heterotrophic unconnected",
"medium autotrophic unconnected",
"medium heterotrophic unconnected",
"large autotrophic unconnected",
"large heterotrophic unconnected"))
n_treatments = length(unique(treatments_and_controls$treatment))
n_controls = length(unique(treatments_and_controls$control))
# Find the ID of autotrophic and heterotrophic ecosystems
ecosystem_IDs_autotrophic = ecosystem_info %>%
filter(trophic_type == "autotrophic") %>%
pull(ecosystem_ID)
ecosystem_IDs_heterotrophic = ecosystem_info %>%
filter(trophic_type == "heterotrophic") %>%
pull(ecosystem_ID)
# --- SET PARAMETERS RELATED TO META-ECOSYSTEMS --- #
# Initialise the "ds_metaecosystems" dataset
ds_metaecosystems = list()
# Define meta-ecosystems to take off because of problems in the experiment
metaecosystems_to_take_off = NULL
# Define the variables you want to calculate for each meta-ecosystem
variables_metaecos = c("total_metaecosystem_bioarea_mm2")
# Find the number of meta-ecosystems in the experiment
system_nr_metaecosystems = ecosystem_info %>%
filter(connection == "connected") %>%
pull(system_nr) %>%
unique
n_metaecosystems = length(system_nr_metaecosystems)
# Define the treatments and controls
treatments_and_controls_metaecos = data.frame(treatment = c("heterotrophic-dominated connected",
"autotrophic-dominated connected",
"equally-dominated connected"),
control = c("heterotrophic-dominated unconnected",
"autotrophic-dominated unconnected",
"equally-dominated unconnected"))
n_treatments_metaecos = length(unique(treatments_and_controls_metaecos$treatment))
n_controls_metaecos = length(unique(treatments_and_controls_metaecos$control))
# --- SET PARAMETERS FOR PLOTTING --- #
# Set line colours
treatment_colours = c("autotrophic-dominated" = "#5ab4ac",
"equally-dominated" = "#969696",
"heterotrophic-dominated" = "#d8b365",
"heterotrophic-dominated connected" = "#CB4335",
"autotrophic-dominated connected" = "#6C3483",
"equally-dominated connected" = "black",
"heterotrophic-dominated unconnected" = "#CB4335",
"autotrophic-dominated unconnected" = "#6C3483",
"equally-dominated unconnected" = "black",
"small heterotrophic" = "#c6dbef",
"medium heterotrophic" = "#3182bd",
"large heterotrophic" = "#08306b",
"small autotrophic" = "#c7e9c0",
"medium autotrophic" = "#2ca25f",
"large autotrophic" = "#00441b",
"small heterotrophic unconnected" = "#deebf7",
"small heterotrophic connected" = "#deebf7",
"medium heterotrophic unconnected" = "#9ecae1",
"medium heterotrophic connected" = "#9ecae1",
"large heterotrophic unconnected" = "#3182bd",
"large heterotrophic connected" = "#3182bd",
"small autotrophic unconnected" = "#ccece6",
"small autotrophic connected" = "#ccece6",
"medium autotrophic unconnected" = "#99d8c9",
"medium autotrophic connected" = "#99d8c9",
"large autotrophic unconnected" = "#2ca25f",
"large autotrophic connected" = "#2ca25f")
# Set line types
treatment_linetypes = c("connected" = "solid",
"unconnected" = "longdash")
# Set figure height and width in the RMD output
figures_width_rmd_output = 10
figures_height_rmd_output = 7
# Set parameters legend
legend_position = "top"
legend_width_cm = 2
size_legend = 12
# Set parameters boxplots
boxplot_width = 2
# Set parameters points & error bars
treatment_points_size = 2.5
width_errorbar = 0.2
dodging_error_bar = 0.5
dodging = 0.5
# Set parameters lines
treatment_lines_linewidth = 1
# Set parameters resource flow line (vertical line indicating the days of the resource flow)
resource_flow_line_type = "solid"
resource_flow_line_colour = "#d9d9d9"
resource_flow_line_width = 0.3
# Set parameters of the horizontal line crossing zero
zero_line_colour = "grey"
zero_line_line_type = "dotted"
zero_line_line_width = 0.5
zero_line_ES_line_type = "dotted"
zero_line_ES_colour = "grey"
zero_line_ES_line_width = 1
# Set parameters of the package "ggarrange" which combines multiple ggplots
ggarrange_margin_top = 0
ggarrange_margin_bottom = 0
ggarrange_margin_left = 0
ggarrange_margin_right = 0
# Set parameters of the figures saved for the paper
cm_to_inches = 0.393701
paper_width_cm = 17.3
paper_height_cm = 20
paper_width_inches = paper_width_cm * cm_to_inches
paper_height_inches = paper_height_cm * cm_to_inches
paper_units = "cm"
paper_res = 600
paper_labels_size = 12
paper_format = "pdf"
# Set parameters of the figures saved for presentations
presentation_figure_size_cm = 15
presentation_figure_width_cm = 30
presentation_figure_height_cm = 22
presentation_figure_size_inches = presentation_figure_size_cm * cm_to_inches
presentation_figure_width_inches = presentation_figure_width_cm * cm_to_inches
presentation_figure_height_inches = presentation_figure_height_cm * cm_to_inches
presentation_labels_size = 24
presentation_x_axis_size = 20
presentation_y_axis_size = presentation_x_axis_size
presentation_treatment_points_size = 5
presentation_treatment_linewidth = 2
# Set parameters for the grey background used to show the time points excluded from the analysis
grey_background_xmin = -Inf
grey_background_xmax = 11.5
grey_background_ymin = -Inf
grey_background_ymax = Inf
grey_background_fill = "#f0f0f0"
grey_background_alpha = 0.03
grey_background_color = "transparent"
# Set parameters axes
size_x_axis = 16
size_y_axis = 14
axis_names = tibble(variable = character(), axis_name = character()) %>%
add_row(variable = "day", axis_name = "Time (day)") %>%
add_row(variable = "ecosystem_size_ml", axis_name = "Ecosystem Size (ml)") %>%
add_row(variable = "total_metaecosystem_bioarea_mm2", axis_name = "Total Biomass (mm²)") %>%
add_row(variable = "bioarea_mm2_per_ml", axis_name = "Biomass Density (mm²/ml)") %>%
add_row(variable = "bioarea_mm2_per_ml_d", axis_name = "Effect Size of Bioarea Density") %>%
add_row(variable = "log10_bioarea_mm2_per_ml", axis_name = "Log10 Biomass (mm²/ml)") %>%
add_row(variable = "ln_bioarea_mm2_per_ml", axis_name = "Ln Biomass (mm²/ml)") %>%
add_row(variable = "sqrt_bioarea_mm2_per_ml", axis_name = "Sqrt Biomass (mm²/ml)") %>%
add_row(variable = "indiv_per_ml", axis_name = "Abundance (ind/ml)") %>%
add_row(variable = "indiv_per_ml_d", axis_name = "Effect Size of Abundance") %>%
add_row(variable = "species_richness", axis_name = "Species Richness") %>%
add_row(variable = "species_richness_d", axis_name = "Effect Size of Species Richness") %>%
add_row(variable = "shannon", axis_name = "Shannon Index") %>%
add_row(variable = "shannon_d", axis_name = "Effect Size of Shannon Index") %>%
add_row(variable = "evenness_pielou", axis_name = "Evenness (Pielou's Index)") %>%
add_row(variable = "evenness_pielou_d", axis_name = "Effect Size of Evenness") %>%
add_row(variable = "median_body_size_µm2", axis_name = "Median Body Size (µm²)") %>%
add_row(variable = "median_body_size_µm2_d", axis_name = "Effect Size of Median Body Size") %>%
add_row(variable = "Ble_indiv_per_ml", axis_name = "Ble Density (ind/ml)") %>%
add_row(variable = "Cep_indiv_per_ml", axis_name = "Cep Density (ind/ml)") %>%
add_row(variable = "Col_indiv_per_ml", axis_name = "Col Density (ind/ml)") %>%
add_row(variable = "Eug_indiv_per_ml", axis_name = "Eug Density (ind/ml)") %>%
add_row(variable = "Eup_indiv_per_ml", axis_name = "Eup Density (ind/ml)") %>%
add_row(variable = "Lox_indiv_per_ml", axis_name = "Lox Density (ind/ml)") %>%
add_row(variable = "Pau_indiv_per_ml", axis_name = "Pau Density (ind/ml)") %>%
add_row(variable = "Pca_indiv_per_ml", axis_name = "Pca Density (ind/ml)") %>%
add_row(variable = "Spi_indiv_per_ml", axis_name = "Spi Density (ind/ml)") %>%
add_row(variable = "Spi_te_indiv_per_ml", axis_name = "Spi te Density (ind/ml)") %>%
add_row(variable = "Tet_indiv_per_ml", axis_name = "Tet Density (ind/ml)") %>%
add_row(variable = "dominance", axis_name = "Dominance (%)") %>%
add_row(variable = "Ble_indiv_per_ml_dominance", axis_name = "Ble Dominance (%)") %>%
add_row(variable = "Cep_indiv_per_ml_dominance", axis_name = "Cep Dominance (%)") %>%
add_row(variable = "Col_indiv_per_ml_dominance", axis_name = "Col Dominance (%)") %>%
add_row(variable = "Eug_indiv_per_ml_dominance", axis_name = "Eug Dominance (%)") %>%
add_row(variable = "Eup_indiv_per_ml_dominance", axis_name = "Eup Dominance (%)") %>%
add_row(variable = "Lox_indiv_per_ml_dominance", axis_name = "Lox Dominance (%)") %>%
add_row(variable = "Pau_indiv_per_ml_dominance", axis_name = "Pau Dominance (%)") %>%
add_row(variable = "Pca_indiv_per_ml_dominance", axis_name = "Pca Dominance (%)") %>%
add_row(variable = "Spi_indiv_per_ml_dominance", axis_name = "Spi Dominance (%)") %>%
add_row(variable = "Spi_te_indiv_per_ml_dominance", axis_name = "Spi te Dominance (%)") %>%
add_row(variable = "Tet_indiv_per_ml_dominance", axis_name = "Tet Dominance (%)") %>%
add_row(variable = "Ble_indiv_per_ml_dominance_d", axis_name = "Effect Size of Ble Dominance") %>%
add_row(variable = "Cep_indiv_per_ml_dominance_d", axis_name = "Effect Size of Cep Dominance") %>%
add_row(variable = "Col_indiv_per_ml_dominance_d", axis_name = "Effect Size of Col Dominance") %>%
add_row(variable = "Eug_indiv_per_ml_dominance_d", axis_name = "Effect Size of Eug Dominance") %>%
add_row(variable = "Eup_indiv_per_ml_dominance_d", axis_name = "Effect Size of Eup Dominance") %>%
add_row(variable = "Lox_indiv_per_ml_dominance_d", axis_name = "Effect Size of Lox Dominance") %>%
add_row(variable = "Pau_indiv_per_ml_dominance_d", axis_name = "Effect Size of Pau Dominance") %>%
add_row(variable = "Pca_indiv_per_ml_dominance_d", axis_name = "Effect Size of Pca Dominance") %>%
add_row(variable = "Spi_indiv_per_ml_dominance_d", axis_name = "Effect Size of Spi Dominance") %>%
add_row(variable = "Spi_te_indiv_per_ml_dominance_d", axis_name = "Effect Size of Spi te Dominance") %>%
add_row(variable = "Tet_indiv_per_ml_dominance_d", axis_name = "Effect Size of Tet Dominance") %>%
add_row(variable = "log_size_class", axis_name = "Log Size (µm²)") %>%
add_row(variable = "class_indiv_per_µl", axis_name = "Density (ind/ml)") %>%
add_row(variable = "median_body_area_µm2", axis_name = "Median Body Size (µm²)") %>%
add_row(variable = "median_body_area_µm2_d", axis_name = "Effect Size of Median Body Size")
# --- SET PARAMETERS FOR MODELLING --- #
# Decide which time points you want to include in the analysis. We chose the
# first time point as 2 because it's after the first disturbance.
time_point_of_baselines = 1
time_points_model = 2:4
# Decide optimizers for mixed effect models
optimizer_input = 'Nelder_Mead'
method_input = ''
ds_individuals)# --- IMPORT DATASETS --- #
# Loop through time points to import them and modify them
for(time_point in time_points){
# Note: We use time_point + 1 for 1-based indexing in R.
ds_individuals[[time_point + 1]] = read.csv(here("..",
"1_data",
paste0(
"t",
time_point,
".csv"))) %>%
mutate(comment = NULL,
# To remember that some videos are replicates of the same ecosystem
# at a time point, introduce the column video_replicate. For all time
# points other than time point 0, it should be 1, as at each time
# point only a single video has been taken for each ecosystem. For
# time point 0, assign the value of file, which is the number of
# video that had been taken.
video_replicate = ifelse(time_point > 0,
yes = as.character(1),
no = as.character(file)),
# To know which ecosystem was filmed, assign to ecosystem ID the
# value of file, as ecosystems were filmed in numerical order.
# Assign NA to videos at time point 0 because we filmed the master
# bottles of autotrophic and heterotrophic ecosystems and not the
# single ecosystems.
ecosystem_ID = ifelse(time_point > 0,
yes = as.character(file),
no = as.character(NA)),
# Derive biomass information
bioarea_µm2 = mean_area,
bioarea_mm2 = bioarea_µm2 * 10^-6,
bioarea_mm2_per_ml = bioarea_mm2 / volume_recorded_ml,
bioarea_mm2_per_frame_per_ml = bioarea_mm2_per_ml * N_frames /
total_frames) %>%
# To tidy up, reorder columns.
select(time_point,
day,
video_replicate,
file,
id,
everything())
}
# --- EXTEND T0 --- #
# Assign to every ecosystem the videos of its trophic type at time point 0.
for(ID in 1:n_ecosystems){
ds_individuals_t0_extending[[ID]] = ds_individuals[[1]] %>%
# To allocate the autotrophic videos to autotrophic ecosystems and the
# heterotrophic videos to heterotrophic ecosystems, choose autotrophic
# videos for autotrophic ecosystems and heterotrophic videos for
# heterotrophic ecosystems.
filter(
(ID %in% ecosystem_IDs_autotrophic & trophic_type == "autotrophic") |
(ID %in% ecosystem_IDs_heterotrophic & trophic_type == "heterotrophic")) %>%
# To have more informative columns, manipulate them.
mutate(file = as.character(str_extract(file, "\\d+")),
ecosystem_ID = as.character(ID),
# To know that different individuals belong to different videos, add the
# column video replicates. The autotrophic videos start from video 1.
# The heterotrophic videos start from video 7.
video_replicate = as.character(file)) %>%
# To tidy up, reorder columns.
select(time_point,
day,
ecosystem_ID,
video_replicate,
file,
id,
everything())
}
# --- BIND EVERYTHING TOGETHER --- #
ds_individuals[[1]] = ds_individuals_t0_extending %>%
bind_rows()
ds_individuals = ds_individuals %>%
bind_rows()
# --- FINISH UP DS_INDIVIDUALS --- #
ds_individuals = ds_individuals %>%
mutate(
# To avoid that trophic_type gives problems when combining ds_individuals with
# ecosystem_info, get rid of it, as it had the right value only at time
# point 0.
trophic_type = NULL,
# To better work with ecosystem_ID, video_replicate, and file columns,
# transform them into a double format.
ecosystem_ID = as.double(str_extract(ecosystem_ID, "\\d+")),
video_replicate = as.double(str_extract(video_replicate, "\\d+")),
file = as.double(str_extract(file, "\\d+"))) %>%
# To have all the information on the ecosystems that were not in the original
# data, bind 'ds_individuals' with 'ecosystem_info'.
left_join(ecosystem_info,
by = "ecosystem_ID") %>%
# To tidy up, reorder columns.
select(time_point,
day,
video_replicate,
ecosystem_ID,
system_nr,
ecosystem_type,
ecosystem_size,
ecosystem_size_ml,
trophic_type,
ecosystem_size_and_trophy,
metaecosystem_type,
connection,
metaeco_type_n_connection,
bioarea_mm2,
bioarea_mm2_per_frame_per_ml,
N_frames,
all_of(columns_for_species_ID))
# --- CREATE TRAINING DATASET --- #
# Create a dataset excluding the species "Ble" for threshold 13.
# This is necessary because at threshold 13, we could not eliminate
# the food source (Chi) for "Ble".
training_thresh_13 = read.csv(here("..",
"1_data",
"training_threshold_13.csv")) %>%
# To make it easier to handle the level, transform Spi te into Spi_te.
mutate(species = case_when(species == "Spi te " ~ "Spi_te",
TRUE ~ species)) %>%
# To tidy up, reorder columns.
select(file,
id,
species,
N_frames,
mean_area,
everything()) %>%
filter(species != "Ble")
# Create a dataset including only the species "Ble" for threshold 40.
# At threshold 40, we successfully eliminated the food source (Chi)
# for "Ble", allowing for its inclusion in the analysis.
training_thresh_40 = read.csv(here("..",
"1_data",
"training_threshold_40.csv")) %>%
# To tidy up, reorder columns.
select(file,
id,
species,
N_frames,
mean_area,
everything()) %>%
filter(species == "Ble")
# Bind the two previous datasets
training_individuals = rbind(training_thresh_13,
training_thresh_40)
# --- CREATE PREDICTIVE MODEL --- #
# See where we previously set the parameters for a description of these variables.
species_ID_model = svm(factor(species) ~
mean_grey +
sd_grey +
mean_area +
sd_area +
mean_perimeter +
mean_turning +
sd_turning +
sd_perimeter +
mean_major +
sd_major +
mean_minor +
sd_minor +
mean_ar +
sd_ar +
duration +
max_net +
net_disp +
net_speed +
gross_disp +
max_step +
min_step +
sd_step +
sd_gross_speed +
max_gross_speed +
min_gross_speed ,
data = training_individuals,
probability = T,
na.action = na.pass)
# --- SHOW CONFUSION MATRIX --- #
# Create a confusion matrix comparing the fitted species predictions from the
# SVM model with the actual species labels from the training dataset.
confusion_matrix = table(species_ID_model$fitted, training_individuals$species)
# Create a copy of the confusion matrix to modify for error calculation.
confusion_matrix_with_diagonal_zeros = confusion_matrix
# Set the diagonal elements (true positives) to zero for error calculation.
# This allows us to compute the misclassification errors without counting the
# correct classifications.
diag(confusion_matrix_with_diagonal_zeros) = 0
# Calculate the class error for each species.
# Class error is defined as the ratio of misclassified samples to the total
# samples for each species.
confusion_matrix = cbind(
confusion_matrix,
class_error = rowSums(confusion_matrix_with_diagonal_zeros) /
rowSums(confusion_matrix)) %>%
as.data.frame() %>%
mutate(class_error_percentage = class_error * 100,
class_error = NULL)
# Display the final confusion matrix with class errors as a data frame.
confusion_matrix
## Ble Cep Col Eug Eup Lox Pau Pca Spi Spi_te Tet class_error_percentage
## Ble 115 0 0 0 0 0 0 0 3 2 0 4.1666667
## Cep 1 129 0 0 4 5 8 1 10 2 0 19.3750000
## Col 0 0 291 0 1 1 2 0 2 0 0 2.0202020
## Eug 1 0 0 576 1 0 0 0 25 3 16 7.3954984
## Eup 0 0 0 0 113 0 2 0 0 0 0 1.7391304
## Lox 0 0 0 0 5 169 1 0 0 0 0 3.4285714
## Pau 4 0 0 0 2 0 94 2 0 0 0 7.8431373
## Pca 0 0 0 0 1 0 0 148 0 0 0 0.6711409
## Spi 4 3 2 0 0 0 1 5 238 0 1 6.2992126
## Spi_te 0 0 0 0 0 0 0 0 0 107 0 0.0000000
## Tet 2 0 0 0 0 5 0 0 3 1 137 7.4324324
# --- ID SPECIES --- #
species_vector = ds_individuals %>%
select(trophic_type, mean_grey:min_gross_speed) %>%
mutate(species = as.character(predict(object = species_ID_model,
.,
type = "response")),
# Given that autotrophic ecosystems are exclusively composed of
# Euglena gracilis (Eug), we can simply designate Eug as the species
# for each individual in autotrophic ecosystems.
species = ifelse(trophic_type == "autotrophic",
"Eug",
species)) %>%
select(species)
ds_individuals = cbind(ds_individuals, species_vector)
ds_ecosystems)# --- SUMMARISE ECOSYSTEM-LEVEL METRICS --- #
ds_ecosystems = ds_individuals %>%
#Summarise for each species their bioarea and nr of individuals
group_by_at(vars(time_point:metaeco_type_n_connection,
species)) %>%
summarise(bioarea_mm2_per_ml = sum(bioarea_mm2_per_frame_per_ml),
indiv_per_ml = sum(N_frames) / total_frames,
median_body_size_mm2 = median(bioarea_mm2),
median_body_size_µm2 = median_body_size_mm2 * 10^6) %>%
# Go from long to wide format
pivot_wider(names_from = species,
values_from = c(bioarea_mm2_per_ml,
indiv_per_ml)) %>%
# Rename the resulting columns for clarity
rename(Ble_indiv_per_ml = indiv_per_ml_Ble,
Cep_indiv_per_ml = indiv_per_ml_Cep,
Col_indiv_per_ml = indiv_per_ml_Col,
Eug_indiv_per_ml = indiv_per_ml_Eug,
Eup_indiv_per_ml = indiv_per_ml_Eup,
Lox_indiv_per_ml = indiv_per_ml_Lox,
Pau_indiv_per_ml = indiv_per_ml_Pau,
Pca_indiv_per_ml = indiv_per_ml_Pca,
Spi_indiv_per_ml = indiv_per_ml_Spi,
Spi_te_indiv_per_ml = indiv_per_ml_Spi_te,
Tet_indiv_per_ml = indiv_per_ml_Tet,
Ble_bioarea_mm2_per_ml = bioarea_mm2_per_ml_Ble,
Cep_bioarea_mm2_per_ml = bioarea_mm2_per_ml_Cep,
Col_bioarea_mm2_per_ml = bioarea_mm2_per_ml_Col,
Eug_bioarea_mm2_per_ml = bioarea_mm2_per_ml_Eug,
Eup_bioarea_mm2_per_ml = bioarea_mm2_per_ml_Eup,
Lox_bioarea_mm2_per_ml = bioarea_mm2_per_ml_Lox,
Pau_bioarea_mm2_per_ml = bioarea_mm2_per_ml_Pau,
Pca_bioarea_mm2_per_ml = bioarea_mm2_per_ml_Pca,
Spi_bioarea_mm2_per_ml = bioarea_mm2_per_ml_Spi,
Spi_te_bioarea_mm2_per_ml = bioarea_mm2_per_ml_Spi_te,
Tet_bioarea_mm2_per_ml = bioarea_mm2_per_ml_Tet) %>%
# Average across videos and calculate metrics. Do that to have a single
# value at time point 0.
group_by_at(vars(time_point:metaeco_type_n_connection)) %>%
summarise(across(contains("bioarea_mm2_per_ml"),
sum,
na.rm = TRUE),
across(contains("indiv_per_ml"),
sum,
na.rm = TRUE),
median_body_size_µm2 = mean(median_body_size_µm2)) %>%
# Calculate ecosystem-level metrics
mutate(
# Calculate biomass and individuals of ecosystems.
bioarea_mm2_per_ml = sum(across(contains("bioarea_mm2_per_ml"))),
bioarea_tot_mm2 = bioarea_mm2_per_ml * ecosystem_size_ml,
indiv_per_ml = sum(across(contains("indiv_per_ml"))),
# Transform species densities from NA to 0
across(all_of(protist_species_indiv_per_ml), ~ replace_na(., 0)),
across(all_of(protist_species_bioarea_mm2_per_ml), ~ replace_na(., 0)),
# Calculate biodiversity of ecosystems
species_richness = specnumber(across(ends_with("_indiv_per_ml"))),
shannon = diversity(across(ends_with("_bioarea_mm2_per_ml")),
index = "shannon"),
shannon = ifelse(species_richness == 0,
yes = NA,
no = shannon),
evenness_pielou = shannon / log(species_richness),
# Calculate species dominance
across(all_of(protist_species_indiv_per_ml),
~ (.x / indiv_per_ml) * 100,
.names = "{.col}_dominance"),
across(all_of(protist_species_bioarea_mm2_per_ml),
~ (.x / indiv_per_ml) * 100,
.names = "{.col}_dominance"),
# Do transformations
log10_bioarea_mm2_per_ml = log10(bioarea_mm2_per_ml),
ln_bioarea_mm2_per_ml = log(bioarea_mm2_per_ml),
sqrt_bioarea_mm2_per_ml = sqrt(bioarea_mm2_per_ml),
cbrt_bioarea_mm2_per_ml = bioarea_mm2_per_ml^(1/3),
sqr_bioarea_mm2_per_ml = bioarea_mm2_per_ml^(2)) %>%
# To have a single value for each ecosystem at a time point, average video replicates.
group_by(across(all_of(c("time_point",
"day",
"system_nr",
"ecosystem_ID",
"metaecosystem_type",
"ecosystem_type",
"ecosystem_size",
"ecosystem_size_ml",
"trophic_type",
"ecosystem_size_and_trophy",
"connection",
"metaeco_type_n_connection")))) %>%
summarise(across(contains("_per_ml"), mean),
across(contains("tot"), mean),
species_richness = mean(species_richness),
shannon = mean(shannon),
evenness_pielou = mean(evenness_pielou),
median_body_size_µm2 = mean(median_body_size_µm2)) %>%
ungroup() %>%
# To tidy up, select useful columns.
select(time_point,
day,
system_nr,
ecosystem_ID,
metaecosystem_type,
metaeco_type_n_connection,
ecosystem_type,
ecosystem_size,
ecosystem_size_ml,
trophic_type,
connection,
ecosystem_size_and_trophy,
bioarea_mm2_per_ml,
log10_bioarea_mm2_per_ml,
ln_bioarea_mm2_per_ml,
sqrt_bioarea_mm2_per_ml,
cbrt_bioarea_mm2_per_ml,
sqr_bioarea_mm2_per_ml,
bioarea_tot_mm2,
indiv_per_ml,
species_richness,
shannon,
evenness_pielou,
median_body_size_µm2,
any_of(protist_species_bioarea_mm2_per_ml),
any_of(protist_species_indiv_per_ml),
any_of(protist_species_dominance))
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(contains("bioarea_mm2_per_ml"), sum, na.rm = TRUE)`.
## ℹ In group 1: `time_point = 0`, `day = 0`, `video_replicate = 1`, `ecosystem_ID
## = 1`, `system_nr = 1`, `ecosystem_type = small autotrophic unconnected`,
## `ecosystem_size = "small"`, `ecosystem_size_ml = 7.5`, `trophic_type =
## "autotrophic"`, `ecosystem_size_and_trophy = small autotrophic`,
## `metaecosystem_type = heterotrophic-dominated`, `connection = unconnected`,
## `metaeco_type_n_connection = "heterotrophic-dominated unconnected"`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
##
## # Previously
## across(a:b, mean, na.rm = TRUE)
##
## # Now
## across(a:b, \(x) mean(x, na.rm = TRUE))
ds_dominances_with_CI)Get ds_dominances_with_CI with all the mean and 95 % CI dominance of all the species.
# To be able to plot dominance for multiple species, get their mean, upper boundary, and lower boundary in a long format.
dominances_mean = ds_ecosystems %>%
group_by(day, time_point, ecosystem_type) %>%
reframe(Ble = mean(Ble_bioarea_mm2_per_ml_dominance, na.rm = TRUE),
Cep = mean(Cep_bioarea_mm2_per_ml_dominance, na.rm = TRUE),
Col = mean(Col_bioarea_mm2_per_ml_dominance, na.rm = TRUE),
Eug = mean(Eug_bioarea_mm2_per_ml_dominance, na.rm = TRUE),
Eup = mean(Eup_bioarea_mm2_per_ml_dominance, na.rm = TRUE),
Lox = mean(Lox_bioarea_mm2_per_ml_dominance, na.rm = TRUE),
Pau = mean(Pau_bioarea_mm2_per_ml_dominance, na.rm = TRUE),
Pca = mean(Pca_bioarea_mm2_per_ml_dominance, na.rm = TRUE),
Spi = mean(Spi_bioarea_mm2_per_ml_dominance, na.rm = TRUE),
Spi_te = mean(Spi_te_bioarea_mm2_per_ml_dominance, na.rm = TRUE),
Tet = mean(Tet_bioarea_mm2_per_ml_dominance, na.rm = TRUE)) %>%
pivot_longer(cols = -c("day", "time_point", "ecosystem_type"),
names_to = "species",
values_to = "mean_dominance")
dominances_lower = ds_ecosystems %>%
group_by(day, time_point, ecosystem_type) %>%
reframe(Ble = quantile(Ble_bioarea_mm2_per_ml_dominance, probs = 0.025, na.rm = TRUE),
Cep = quantile(Cep_bioarea_mm2_per_ml_dominance, probs = 0.025, na.rm = TRUE),
Col = quantile(Col_bioarea_mm2_per_ml_dominance, probs = 0.025, na.rm = TRUE),
Eug = quantile(Eug_bioarea_mm2_per_ml_dominance, probs = 0.025, na.rm = TRUE),
Eup = quantile(Eup_bioarea_mm2_per_ml_dominance, probs = 0.025, na.rm = TRUE),
Lox = quantile(Lox_bioarea_mm2_per_ml_dominance, probs = 0.025, na.rm = TRUE),
Pau = quantile(Pau_bioarea_mm2_per_ml_dominance, probs = 0.025, na.rm = TRUE),
Pca = quantile(Pca_bioarea_mm2_per_ml_dominance, probs = 0.025, na.rm = TRUE),
Spi = quantile(Spi_bioarea_mm2_per_ml_dominance, probs = 0.025, na.rm = TRUE),
Spi_te = quantile(Spi_te_bioarea_mm2_per_ml_dominance, probs = 0.025, na.rm = TRUE),
Tet = quantile(Tet_bioarea_mm2_per_ml_dominance, probs = 0.025, na.rm = TRUE)) %>%
pivot_longer(cols = -c("day", "time_point", "ecosystem_type"),
names_to = "species",
values_to = "lower_95_CI")
dominances_upper = ds_ecosystems %>%
group_by(day, time_point, ecosystem_type) %>%
reframe(Ble = quantile(Ble_bioarea_mm2_per_ml_dominance, probs = 0.975, na.rm = TRUE),
Cep = quantile(Cep_bioarea_mm2_per_ml_dominance, probs = 0.975, na.rm = TRUE),
Col = quantile(Col_bioarea_mm2_per_ml_dominance, probs = 0.975, na.rm = TRUE),
Eug = quantile(Eug_bioarea_mm2_per_ml_dominance, probs = 0.975, na.rm = TRUE),
Eup = quantile(Eup_bioarea_mm2_per_ml_dominance, probs = 0.975, na.rm = TRUE),
Lox = quantile(Lox_bioarea_mm2_per_ml_dominance, probs = 0.975, na.rm = TRUE),
Pau = quantile(Pau_bioarea_mm2_per_ml_dominance, probs = 0.975, na.rm = TRUE),
Pca = quantile(Pca_bioarea_mm2_per_ml_dominance, probs = 0.975, na.rm = TRUE),
Spi = quantile(Spi_bioarea_mm2_per_ml_dominance, probs = 0.975, na.rm = TRUE),
Spi_te = quantile(Spi_te_bioarea_mm2_per_ml_dominance, probs = 0.975, na.rm = TRUE),
Tet = quantile(Tet_bioarea_mm2_per_ml_dominance, probs = 0.975, na.rm = TRUE)) %>%
pivot_longer(cols = -c("day", "time_point", "ecosystem_type"),
names_to = "species",
values_to = "upper_95_CI")
ds_dominances_with_CI = reduce(list(dominances_mean,
dominances_lower,
dominances_upper),
full_join,
by = NULL)
ds_ecosystems_effect_size)To analyse how strong the effects of the connection on the biomass,
biodiversity, and population densities within ecosystems were, we want
to compile a dataset (ds_ecosystems_effect_size) where each
row represents the effect size of a connected vs unconnected ecosystem
type (e.g., small autotrophic connected vs unconnected) at a time point.
If the 95% CI of the effect size crosses the zero line it means that it
is statistically significant. We use the effect size Hedge’s d (aka
Hedge’s g), as it can handle the treatment and/or control equal to zero
(log response ratio cannot: ln(treatment/0) = Inf; ln(0/control) = -inf;
ln(0/0) = Nan), small sample sizes (“it works well when there are as
few as five to ten studies”, @Rosenberg2013), and unequal variance between
treatment and control ([@Rosenberg2013]).
Hedge’s d (d) is the difference in mean between treatment and control
divided by by their weighted spread (denominator) multiplied by a factor
(J) that controls for small sample sizes [@Rosenberg2013]. The 95% confidence interval of
Hedge’s d is calculated from its standard error (SE) as [@Hedges1985] \(d ±
1.96 * SE\) where the standard error is calculated as show in
@Borenstein2009.
We calculate Hedge’s d and its 95 % confidence interval in three steps. (1) Calculate means, standard deviations and sample sizes. Calculate the mean of the treatment (Y1) and control (Y2), the standard deviation of the treatment (s1) and the control (s2), as well as the sample size of the treatment (n1) and the control (n2) at each time point and for each response variable. (2) Calculate Hedge’s d. Use means, standard deviations and sample sizes to calculate effect sizes. (3) Retain only the treatments (connected ecosystems), discarding the controls (unconnected). This is because we calculated the effect size of only connected compared to unconnected and not the other way around.
\[ d = \frac{Y1 - Y2}{denominator} * J \]
\[ denominator = \sqrt{\frac{(n_1-1)*s_1^2 + (n_2 - 1) * s_2^2}{n_1 + n_2 - 2}} \]
\[ J = 1 - \frac{3}{(4 * (n_1 + n_2 - 2)) - 1} \]
\[ SE = \sqrt{J^2 * \frac{n_1 + n_2}{n_1*n_2} + \frac{d^2}{2*(n_1 + n_2)}} \]
## - (1) Calculate means, standard deviations and sample sizes. - ##
for (variable in 1:length(variables_ecosystems)) {
ds_ecosystems_effect_size[[variable]] = ds_ecosystems %>%
filter(
# To have a cleaner dataset filter out time point 0, as ecosystems were assumed to be the same as the bottles from which they were started, as we only measured these bottles and not the ecosystems.
time_point != 0,
# To have a cleaner dataset filter out ecosystems in which this variable could not be computed (e.g., species richness of a crashed culture).
!is.na(!!sym(variables_ecosystems[variable]))) %>%
# To afterwards calculate effect sizes, get the mean, sd, and sample size of treatments at each time point
group_by(across(all_of(c("time_point",
"day",
"ecosystem_type",
"ecosystem_size_and_trophy",
"connection",
"ecosystem_size",
"ecosystem_size_ml",
"trophic_type",
"metaecosystem_type")))) %>%
summarise(across(all_of(variables_ecosystems[variable]),
list(mean = mean,
sd = sd)),
sample_size = n()) %>%
# To know of which variable the sample size is when you afterwards bind columns, add the name of the variable to the name of the sample size column.
rename_with( ~ paste0(variables_ecosystems[variable],
"_sample_size"),
matches("sample_size"))
}
# To have in the rows treatment and in the columns means, sd, and sample size, bind columns together.
ds_ecosystems_effect_size <- reduce(ds_ecosystems_effect_size,
full_join,
by = c("time_point",
"day",
"ecosystem_type",
"ecosystem_size_and_trophy",
"connection",
"ecosystem_size",
"ecosystem_size_ml",
"trophic_type",
"metaecosystem_type"))
## - Calculate Hedge's d. - ##
# Initialise the effect size columns
for (variable in length(variables_ecosystems)) {
ds_ecosystems_effect_size <- ds_ecosystems_effect_size %>%
mutate(!!paste0(variables_ecosystems[variable], "_d") := NA,
!!paste0(variables_ecosystems[variable], "_d_upper") := NA,
!!paste0(variables_ecosystems[variable], "_d_lower") := NA)
}
row_n = 0
# For each treatment at each time point, calculate effect size and 95% CI
for (treatment in 1:nrow(treatments_and_controls)) {
for (time_p in 1:length(time_points_for_ES)) {
row_n = row_n + 1
treatment_row = ds_ecosystems_effect_size %>%
filter(ecosystem_type == treatments_and_controls$treatment[treatment],
time_point == time_points_for_ES[time_p])
control_row = ds_ecosystems_effect_size %>%
filter(ecosystem_type == treatments_and_controls$control[treatment],
time_point == time_points_for_ES[time_p])
for (variable in 1:length(variables_ecosystems)) {
hedges_d = calculate.hedges_d(
treatment_row[[paste0(variables_ecosystems[variable], "_mean")]],
treatment_row[[paste0(variables_ecosystems[variable], "_sd")]],
treatment_row[[paste0(variables_ecosystems[variable], "_sample_size")]],
control_row[[paste0(variables_ecosystems[variable], "_mean")]],
control_row[[paste0(variables_ecosystems[variable], "_sd")]],
control_row[[paste0(variables_ecosystems[variable], "_sample_size")]])
ds_ecosystems_effect_size[[paste0(variables_ecosystems[variable], "_d")]][
ds_ecosystems_effect_size$ecosystem_type ==
treatments_and_controls$treatment[treatment] &
ds_ecosystems_effect_size$time_point ==
time_points_for_ES[time_p]] =
hedges_d$d
ds_ecosystems_effect_size[[paste0(variables_ecosystems[variable], "_d_upper")]][
ds_ecosystems_effect_size$ecosystem_type ==
treatments_and_controls$treatment[treatment] &
ds_ecosystems_effect_size$time_point ==
time_points_for_ES[time_p]] =
hedges_d$upper_CI
ds_ecosystems_effect_size[[paste0(variables_ecosystems[variable], "_d_lower")]][
ds_ecosystems_effect_size$ecosystem_type ==
treatments_and_controls$treatment[treatment] &
ds_ecosystems_effect_size$time_point ==
time_points_for_ES[time_p]] =
hedges_d$lower_CI
}
}
}
## - Retain only the connected ecosystems, discarding the controls (unconnected). - ##
ds_ecosystems_effect_size = ds_ecosystems_effect_size %>%
filter(connection == "connected")
However, Hedge’s d because of how much it depends on the pooled
spread of the treatment and control might be harder to interpret than
the log response ratio. Therefore, even though the log response ratio
might not be calculated for all the protist species, we also decide to
calculate it as well. To calculate the log response ratio of the
dominance of protist species I use here the package
SingleCaseES (see this
link). This package. The package provides the function
LRRi which calculates effect sizes for a single study case.
It also adjust for small sample sizes by default.
## - Calculate the effect size (Log Response Ratio). - ##
# Initialise the effect size columns
for (variable in length(protist_species_dominance)) {
ds_ecosystems_effect_size <- ds_ecosystems_effect_size %>%
mutate(!!paste0(protist_species_dominance[variable], "_LRR") := NA,
!!paste0(protist_species_dominance[variable], "_LRR_upper") := NA,
!!paste0(protist_species_dominance[variable], "_LRR_lower") := NA)
}
row_n = 0
# For each treatment, time point, and variable calculate LRR and 95% CI
for (treatment in 1:nrow(treatments_and_controls)) {
for (time_p in 1:length(time_points_for_ES)) {
for (variable in 1:length(protist_species_dominance)) {
row_n = row_n + 1
treatment_values = ds_ecosystems %>%
filter(ecosystem_type == treatments_and_controls$treatment[treatment],
time_point == time_points_for_ES[time_p],
!is.na(!!sym(protist_species_dominance[variable]))) %>%
pull(!!sym(protist_species_dominance[variable]))
control_values = ds_ecosystems %>%
filter(ecosystem_type == treatments_and_controls$control[treatment],
time_point == time_points_for_ES[time_p],
!is.na(!!sym(protist_species_dominance[variable]))) %>%
pull(!!sym(protist_species_dominance[variable]))
# By default it corrects for small sample sizes and calculates 95 % CI.
LRR_output = LRRi(A_data = control_values,
B_data = treatment_values,
scale = "proportion")
ds_ecosystems_effect_size[[paste0(protist_species_dominance[variable], "_LRR")]][
ds_ecosystems_effect_size$ecosystem_type ==
treatments_and_controls$treatment[treatment] &
ds_ecosystems_effect_size$time_point ==
time_points_for_ES[time_p]] =
LRR_output$Est
ds_ecosystems_effect_size[[paste0(protist_species_dominance[variable], "_LRR_upper")]][
ds_ecosystems_effect_size$ecosystem_type ==
treatments_and_controls$treatment[treatment] &
ds_ecosystems_effect_size$time_point ==
time_points_for_ES[time_p]] =
LRR_output$CI_upper
ds_ecosystems_effect_size[[paste0(protist_species_dominance[variable], "_LRR_lower")]][
ds_ecosystems_effect_size$ecosystem_type ==
treatments_and_controls$treatment[treatment] &
ds_ecosystems_effect_size$time_point ==
time_points_for_ES[time_p]] =
LRR_output$CI_lower
}
}
}
ds_species_effect_sizes_d)# To be able to plot dominance for multiple species, get the effect sizes, their upper boundary, and their lower boundary in a long format.
filtered_data = ds_ecosystems_effect_size %>%
ungroup() %>%
filter(connection == "connected",
trophic_type == "heterotrophic") %>%
filter(time_point > 1) %>%
# To have a cleaner dataset that is easier to look at, select only the columns of the time point, ecosystem type, and protist species dominance (w/ CI).
select(time_point,
ecosystem_type,
paste0(protist_species_dominance, "_d"),
paste0(protist_species_dominance, "_d_upper"),
paste0(protist_species_dominance, "_d_lower"))
ds_species_effect_sizes_d = filtered_data %>%
pivot_longer(cols = ends_with("dominance_d"),
names_to = "species",
values_to = "effect_size") %>%
select(time_point,
ecosystem_type,
species,
effect_size)
ds_species_effect_sizes_d_upper = filtered_data %>%
pivot_longer(cols = ends_with("dominance_d_upper"),
names_to = "species",
values_to = "upper_95_CI") %>%
select(upper_95_CI)
ds_species_effect_sizes_d_lower = filtered_data %>%
pivot_longer(cols = ends_with("dominance_d_lower"),
names_to = "species",
values_to = "lower_95_CI") %>%
select(lower_95_CI)
ds_species_effect_sizes_d = cbind(ds_species_effect_sizes_d, ds_species_effect_sizes_d_upper, ds_species_effect_sizes_d_lower) %>%
mutate(species = str_remove(species, "_bioarea_mm2_per_ml_dominance_d")) %>%
# To understand if there is an effect on size distribution, plot the effects of connection on dominance by disposing species smallest to largest.
mutate(species = factor(x = species,
levels = c("Tet", "Col", "Eup", "Pau", "Cep", "Lox", "Pca", "Spi_te", "Ble", "Spi")))
ds_species_effect_sizes_LRR)# To be able to plot dominance for multiple species, get the effect sizes, their upper boundary, and their lower boundary in a long format.
filtered_data = ds_ecosystems_effect_size %>%
ungroup() %>%
filter(connection == "connected",
trophic_type == "heterotrophic") %>%
filter(time_point > 1) %>%
# To have a cleaner dataset that is easier to look at, select only the columns of the time point, ecosystem type, and protist species dominance (w/ CI).
select(time_point,
ecosystem_type,
paste0(protist_species_dominance, "_LRR"),
paste0(protist_species_dominance, "_LRR_upper"),
paste0(protist_species_dominance, "_LRR_lower"))
ds_species_effect_sizes_LRR = filtered_data %>%
pivot_longer(cols = ends_with("dominance_LRR"),
names_to = "species",
values_to = "effect_size") %>%
select(time_point,
ecosystem_type,
species,
effect_size)
ds_species_effect_sizes_LRR_upper = filtered_data %>%
pivot_longer(cols = ends_with("dominance_LRR_upper"),
names_to = "species",
values_to = "upper_95_CI") %>%
select(upper_95_CI)
ds_species_effect_sizes_LRR_lower = filtered_data %>%
pivot_longer(cols = ends_with("dominance_LRR_lower"),
names_to = "species",
values_to = "lower_95_CI") %>%
select(lower_95_CI)
ds_species_effect_sizes_LRR = cbind(ds_species_effect_sizes_LRR, ds_species_effect_sizes_LRR_upper, ds_species_effect_sizes_LRR_lower) %>%
mutate(species = str_remove(species, "_bioarea_mm2_per_ml_dominance_LRR")) %>%
# To understand if there is an effect on size distribution, plot the effects of connection on dominance by disposing species smallest to largest.
mutate(species = factor(x = species,
levels = c("Tet", "Col", "Eup", "Pau", "Cep", "Lox", "Pca", "Spi_te", "Ble", "Spi")))
ds_metaecosystems)# --- IDENTIFY ECOSYSTEM COMBINATIONS THAT CONSTITUTE CONNECTED META-ECOSYSTEMS --- #
combinations_connected = ds_ecosystems %>%
filter(
# To have the information about to which system nr and metaecosystem type
# each ecosystem belongs, we select one of the time points to don't have
# multiple information of the same ecosystem.
time_point == 0,
# To create only combinations for connected metaecosystems,
# we filter the combinations of only connected ecosystems.
connection == "connected") %>%
select(system_nr,
ecosystem_ID,
metaecosystem_type,
connection,
metaeco_type_n_connection) %>%
# To know which ecosystems were combined to for the connected meta-ecosystem,
# assign the ecosystem ID to the first and second ecosystems as mean ± 0.5.
# This is because the two ecosystems of a meta-ecosystem are two integers
# next to each other (e.g., 31 and 32).
group_by(system_nr,
metaecosystem_type,
connection,
metaeco_type_n_connection) %>%
summarise(ID_first_ecosystem = (mean(ecosystem_ID) - 0.5),
ID_second_ecosystem = (mean(ecosystem_ID) + 0.5)) %>%
mutate(ecosystems_combined = paste0(ID_first_ecosystem,
"|",
ID_second_ecosystem)) %>%
as.data.frame()
# --- IDENTIFY THE COMBINATIONS OF ECOSYSTEMS THAT CONSTITUTE UNCONNECTED META-ECOSYSTEMS --- #
# Determine ecosystems IDs of unconnected autotrophic ecosystems
ID_unconnected_S_autotrophic = ds_ecosystems %>%
filter(ecosystem_type == "small autotrophic unconnected") %>%
pull(ecosystem_ID) %>%
unique()
ID_unconnected_M_autotrophic = ds_ecosystems %>%
filter(ecosystem_type == "medium autotrophic unconnected") %>%
pull(ecosystem_ID) %>%
unique()
ID_unconnected_L_autotrophic = ds_ecosystems %>%
filter(ecosystem_type == "large autotrophic unconnected") %>%
pull(ecosystem_ID) %>%
unique()
# Determine ecosystems IDs of unconnected heterotrophic ecosystems
ID_unconnected_S_heterotrophic = ds_ecosystems %>%
filter(ecosystem_type == "small heterotrophic unconnected") %>%
pull(ecosystem_ID) %>%
unique()
ID_unconnected_M_heterotrophic = ds_ecosystems %>%
filter(ecosystem_type == "medium heterotrophic unconnected") %>%
pull(ecosystem_ID) %>%
unique()
ID_unconnected_L_heterotrophic = ds_ecosystems %>%
filter(ecosystem_type == "large heterotrophic unconnected") %>%
pull(ecosystem_ID) %>%
unique()
# Find combinations
combinations_heterotrophic_dominated = crossing(ID_unconnected_L_heterotrophic,
ID_unconnected_S_autotrophic) %>%
mutate(metaecosystem_type = "heterotrophic-dominated",
connection = "unconnected") %>%
rename(ID_first_ecosystem = ID_unconnected_L_heterotrophic,
ID_second_ecosystem = ID_unconnected_S_autotrophic) %>%
select(metaecosystem_type,
connection,
ID_first_ecosystem,
ID_second_ecosystem)
combinations_equally_dominated = crossing(ID_unconnected_M_heterotrophic,
ID_unconnected_M_autotrophic) %>%
mutate(metaecosystem_type = "equally-dominated",
connection = "unconnected") %>%
rename(ID_first_ecosystem = ID_unconnected_M_heterotrophic,
ID_second_ecosystem = ID_unconnected_M_autotrophic) %>%
select(metaecosystem_type,
connection,
ID_first_ecosystem,
ID_second_ecosystem)
combinations_autotrophic_dominated = crossing(ID_unconnected_S_heterotrophic,
ID_unconnected_L_autotrophic) %>%
mutate(metaecosystem_type = "autotrophic-dominated",
connection = "unconnected") %>%
rename(ID_first_ecosystem = ID_unconnected_S_heterotrophic,
ID_second_ecosystem = ID_unconnected_L_autotrophic) %>%
select(metaecosystem_type,
connection,
ID_first_ecosystem,
ID_second_ecosystem)
# Bind combinations
combinations_unconnected = rbind(combinations_autotrophic_dominated,
combinations_heterotrophic_dominated,
combinations_equally_dominated) %>%
# To have a system nr for all the unconnected meta-ecosystems, give them a
# nr that starts from 1000, so that they can be distinguished
# from the connected meta-ecosystems.
mutate(system_nr = 1001:(1000 + nrow(.)),
connection = "unconnected",
metaeco_type_n_connection = paste(metaecosystem_type, connection),
ecosystems_combined = paste0(ID_first_ecosystem, "|", ID_second_ecosystem)) %>%
select(system_nr,
metaecosystem_type,
connection,
metaeco_type_n_connection,
ID_first_ecosystem,
ID_second_ecosystem,
ecosystems_combined)
# --- BIND CONNECTED AND UNCONNECTED META-ECOSYSTEM COMBINATIONS --- #
ecosystem_combinations = rbind(combinations_connected,
combinations_unconnected)
# --- USE ECOSYSTEM COMBINATIONS TO CREATE DS_METAECOSYSTEMS --- #
# Set parameters
row_i = 0
# Create ds_metaecosystems
for (combination in 1 : nrow(ecosystem_combinations)) {
for (time_p in time_points) {
# Set parameters
row_i = row_i + 1
# Create ds_metaecosystems row
ds_metaecosystems[[row_i]] = ds_ecosystems %>%
filter(
ecosystem_ID %in% c(
ecosystem_combinations[combination, ]$ID_first_ecosystem,
ecosystem_combinations[combination, ]$ID_second_ecosystem),
time_point == time_p) %>%
# Calculate meta-ecosystem metrics
summarise(total_metaecosystem_bioarea_mm2 = sum(bioarea_tot_mm2)) %>%
mutate(time_point = time_p,
# To know on which day the meta-ecosystem was sampled, select the
# sampling day that is time_p + 1. This is because the first
# sampling day is at time point 0, so all the days are shifted of
# 1 on place (e.g., the sampling day of time point 1 is at
# sampling_days[2]).
day = sampling_days[time_p + 1],
system_nr = ecosystem_combinations[combination, ]$system_nr,
ecosystems_combined = ecosystem_combinations[combination, ]$ecosystems_combined,
metaecosystem_type = ecosystem_combinations[combination, ]$metaecosystem_type,
connection = ecosystem_combinations[combination, ]$connection,
metaeco_type_n_connection = paste(metaecosystem_type, connection)) %>%
ungroup()
}
}
# Finish up ds_metaecosystems
ds_metaecosystems = ds_metaecosystems %>%
bind_rows() %>%
as.data.frame() %>%
select(time_point,
day,
system_nr,
ecosystems_combined,
metaecosystem_type,
metaeco_type_n_connection,
connection,
total_metaecosystem_bioarea_mm2) %>%
filter(!system_nr %in% metaecosystems_to_take_off)
unconnected_combinations_sets &
sets_of_sets)In the previous section of code, we identified combinations of
ecosystems that form unconnected meta-ecosystems. However, directly
comparing all unconnected meta-ecosystems to connected ones would
inflate our degrees of freedom due to autocorrelation among unconnected
meta-ecosystems sharing the same ecosystem. To address this, we will
compare five connected meta-ecosystems to five unconnected
meta-ecosystems in the analysis, each comparison comprising different
combinations of unconnected meta-ecosystems. In each comparison, we
create a “combination set” of unconnected meta-ecosystems. For each
combination set (e.g., equally-dominated unconnected), we assign
numerical order to the first ecosystem type (e.g., medium heterotrophic)
and permute the order of the second type’s ecosystems (e.g., medium
autotrophic). For example, let’s sat we are trying to create combination
sets of equally-dominated unconnected where the first ecosystem type is
medium heterotrophic (1,2,3,4,5), and the second type is medium
autotrophic (6,7,8,9,10). Combinations of the first ecosystem type in
numerical order (1,2,3,4,5; 1,2,3,4,5; etc.) and permutations of the
second ecosystem type (e.g., 6,7,8,9,10; 6,7,8,10,9; etc.) would give us
the following: 1|6, 1|7, 1|8, 1|9, 1|10; 1|6, 1|7, 1|8, 1|10, 1|9; etc.
To create an object which combines heterotrophic-dominated,
equally-dominated, and autotrophic-dominated unconnected meta-ecosystem
sets (sets_of_sets) we need to go through the following
steps. (1) Create a function to combine unconnected meta-ecosystems into
sets where each ecosystem appears only once. (2) Find the combination
sets for heterotrophic-dominated, equally-dominated, and
autotrophic-dominated unconnected meta-ecosystems. (3) Find the sets of
sets in which you combine all three heterotrophic-dominated,
equally-dominated, and autotrophic-dominated unconnected meta-ecosystems
(sets_of_sets).
# --- CREATE FUNCTION TO COMBINE UNCONNECTED META-ECOSYSTEMS --- #
create.unconnected.sets = function(metaeco_type_n_connection_selected,
ecosystem_type_1,
ecosystem_type_2) {
# Find ID of the two ecosystems
ID_ecosystem_type_1 = ds_ecosystems %>%
filter(ecosystem_type == ecosystem_type_1) %>%
pull(ecosystem_ID) %>%
unique()
ID_ecosystem_type_2 = ds_ecosystems %>%
filter(ecosystem_type == ecosystem_type_2) %>%
pull(ecosystem_ID) %>%
unique()
# Create dataset with sets of unconnected meta-ecosystems
two_ecosystem_unconnected_sets <- data.frame(
# Give information on meta-ecosystem
metaeco_type = gsub(" unconnected", "", metaeco_type_n_connection_selected),
connection = "unconnected",
metaeco_type_n_connection = metaeco_type_n_connection_selected,
# Repeat the IDs of the ecosystem type 1 for as many times as the
# permutations of ecosystem type 2
ID_first_ecosystem = rep(ID_ecosystem_type_1,
times = length(permn(ID_ecosystem_type_2))),
# Permute the IDs of the ecosystem type 2
ID_second_ecosystem = unlist(flatten(permn(ID_ecosystem_type_2))),
# To have each set of unconnected meta-ecosystems to have a number, repeat
# the IDs of the ecosystem type ... for the number of ecosystem IDs of the
# ecosystem type ... (WARNING: if you want to do this code for another
# dataset it has to be done differently if you are taking off some
# ecosystems).
set = rep(1 : length(permn(ID_ecosystem_type_2)),
each = length(ID_ecosystem_type_1))) %>%
# To don't have problematic ecosystems, take them off (in our ecosystems
# we don't have to take them off though).
filter(!ID_first_ecosystem %in% ecosystems_to_take_off,
!ID_second_ecosystem %in% ecosystems_to_take_off) %>%
# To include other information for each unconnected metaecosystem, join
# with ecosystem_combinations.
full_join(ecosystem_combinations %>%
filter(metaeco_type_n_connection ==
metaeco_type_n_connection_selected)) %>%
select(set,
system_nr,
metaeco_type,
connection,
metaeco_type_n_connection,
ID_first_ecosystem,
ID_second_ecosystem,
ecosystems_combined)
# Return object 'two_ecosystem_unconnected_sets'
return(two_ecosystem_unconnected_sets)
}
## - (2) Find the combination sets for heterotrophic-dominated, equally-dominated, and autotrophic-dominated unconnected meta-ecosystems. - ##
unconnected_combinations_sets = rbind(
create.unconnected.sets("autotrophic-dominated unconnected",
"small heterotrophic unconnected",
"large autotrophic unconnected"),
create.unconnected.sets("heterotrophic-dominated unconnected",
"large heterotrophic unconnected",
"small autotrophic unconnected"),
create.unconnected.sets("equally-dominated unconnected",
"medium heterotrophic unconnected",
"medium autotrophic unconnected"))
## - (3) Find the sets of sets in which you combine all three heterotrophic-dominated, equally-dominated, and autotrophic-dominated unconnected meta-ecosystems (sets_of_sets). - ##
sets_of_sets = expand.grid(autotrophic_dominated_set_n = unconnected_combinations_sets %>%
filter(metaeco_type_n_connection == "autotrophic-dominated unconnected") %>%
pull(set) %>%
unique(),
heterotrophic_dominated_set_n = unconnected_combinations_sets %>%
filter(metaeco_type_n_connection == "heterotrophic-dominated unconnected") %>%
pull(set) %>%
unique(),
equally_dominated_set_n = unconnected_combinations_sets %>%
filter(metaeco_type_n_connection == "equally-dominated unconnected") %>%
pull(set) %>%
unique())
# --- DEFINE RESPONSE VARIABLE --- #
response_variable_selected = "total_metaecosystem_bioarea_mm2"
# --- PLOT ORIGINAL DATA --- #
plot.metaecos.points(ds_metaecosystems,
response_variable_selected,
3)
# --- PREPARE DATA FOR ANALYSIS --- #
# Add baselines
baselines = ds_metaecosystems %>%
filter(time_point == time_point_of_baselines) %>%
select(system_nr,
all_of(response_variable_selected)) %>%
rename(baseline = all_of(response_variable_selected))
data_for_analysis = ds_metaecosystems %>%
left_join(baselines)
# Filter
data_for_analysis = data_for_analysis %>%
filter(time_point %in% time_points_model,
!is.na(!!sym(response_variable_selected)))
# --- PLOT DATA FOR ANALYSIS --- #
plot.metaecos.points(data_for_analysis,
response_variable_selected,
3)
# --- DEFINE NR OF BOOTSTRAP ITERATIONS --- #
bootstrap_iterations = 1000
# --- COMPUTE STATS FOR ALL ECOSYSTEM COMBINATIONS --- #
# If you see "Warning in (function (iprint = 0L, maxfun = 10000L,
# FtolAbs = 1e-05, FtolRel = ## 1e-15, : unused control arguments ignored",
# ignore it. It just means that you didn't pass the control argument to the
# Nelder_Mead optimiser, so it uses the default.
# Initialise lists
set_data_for_analysis = list()
coefficients = list()
full_model = list()
# Get the number of rows in the sets_of_sets data frame
n_sets_of_sets = nrow(sets_of_sets)
# Generate random sets of indices for bootstrap iterations
random_nrs <- runif(bootstrap_iterations,
min = 1,
max = n_sets_of_sets) %>% round()
# Set the optimizer inputs for model fitting
optimizer_input = "optimx"
method_input = "L-BFGS-B"
# Loop through each randomly selected set of sets
for (i in 1:length(random_nrs)) {
# Get the current set of sets based on the random index
sets = sets_of_sets[random_nrs[i],]
# Identify system numbers that are unconnected based on the metaecosystem type
unconnected_system_nrs = unconnected_combinations_sets %>%
filter((metaeco_type_n_connection == "autotrophic-dominated unconnected" &
set == sets$autotrophic_dominated_set_n) |
(metaeco_type_n_connection == "equally-dominated unconnected" &
set == sets$equally_dominated_set_n) |
(metaeco_type_n_connection == "heterotrophic-dominated unconnected" &
set == sets$heterotrophic_dominated_set_n)) %>%
pull(system_nr) # Extract system numbers from the filtered data
# Filter the data for analysis based on connection status and unconnected systems
set_data_for_analysis[[i]] = data_for_analysis %>%
filter(connection == "connected" | system_nr %in% unconnected_system_nrs)
# Fit a generalized linear mixed model with a Tweedie distribution
# This is a try. I changed the lmer model with a tweedie glmmtmb.
full_model[[i]] = glmmTMB::glmmTMB(get(response_variable_selected) ~
metaecosystem_type * connection +
(1 | day) +
(1 | baseline),
data = set_data_for_analysis[[i]],
REML = FALSE,
family = glmmTMB::tweedie(link = "log"))
# Extract and store the coefficients from the model summary
coefficients[[i]] = summary(full_model[[i]])[["coefficients"]]
}
# Save the results
save(bootstrap_iterations,
file = here("..",
"3_results",
"mixed_effect_models",
"metaeco_analysis_n_of_bootstrap_iterations.RData"))
save(random_nrs,
file = here("..",
"3_results",
"mixed_effect_models",
"metaeco_analysis_bootstrapped_sets_of_sets.RData"))
save(coefficients,
file = here("..",
"3_results",
"mixed_effect_models",
"metaeco_analysis_bootstrapped_model_coefficients.RData"))
# --- LOAD RESULTS --- #
load(file = here("..",
"3_results",
"mixed_effect_models",
"metaeco_analysis_n_of_bootstrap_iterations.RData"))
load(file = here("..",
"3_results",
"mixed_effect_models",
"metaeco_analysis_bootstrapped_sets_of_sets.RData"))
load(file = here("..",
"3_results",
"mixed_effect_models",
"metaeco_analysis_bootstrapped_model_coefficients.RData"))
# --- SHOW MODEL RESIDUALS --- #
create.res.vs.fit.metaecos(full_model[[1]],
set_data_for_analysis[[i]])
qqnorm(resid(full_model[[1]]))
qqline(resid(full_model[[1]]))
# --- SHOW MODEL SUMMARY --- #
print(summary(full_model[[1]]), digits = 3)
## Family: tweedie ( log )
## Formula:
## get(response_variable_selected) ~ metaecosystem_type * connection +
## (1 | day) + (1 | baseline)
## Data: set_data_for_analysis[[i]]
##
## AIC BIC logLik deviance df.resid
## 1025.4 1050.4 -502.7 1005.4 80
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## day (Intercept) 0.04295 0.2073
## baseline (Intercept) 0.00214 0.0462
## Number of obs: 90, groups: day, 3; baseline, 30
##
## Dispersion parameter for tweedie family (): 3.82
##
## Conditional model:
## Estimate
## (Intercept) 5.8201
## metaecosystem_typeequally-dominated -0.2318
## metaecosystem_typeheterotrophic-dominated -0.4888
## connectionconnected 0.2179
## metaecosystem_typeequally-dominated:connectionconnected -0.2445
## metaecosystem_typeheterotrophic-dominated:connectionconnected -0.6068
## Std. Error
## (Intercept) 0.1332
## metaecosystem_typeequally-dominated 0.0867
## metaecosystem_typeheterotrophic-dominated 0.0910
## connectionconnected 0.0800
## metaecosystem_typeequally-dominated:connectionconnected 0.1215
## metaecosystem_typeheterotrophic-dominated:connectionconnected 0.1319
## z value Pr(>|z|)
## (Intercept) 43.7 < 2e-16
## metaecosystem_typeequally-dominated -2.7 0.0075
## metaecosystem_typeheterotrophic-dominated -5.4 7.8e-08
## connectionconnected 2.7 0.0065
## metaecosystem_typeequally-dominated:connectionconnected -2.0 0.0442
## metaecosystem_typeheterotrophic-dominated:connectionconnected -4.6 4.2e-06
##
## (Intercept) ***
## metaecosystem_typeequally-dominated **
## metaecosystem_typeheterotrophic-dominated ***
## connectionconnected **
## metaecosystem_typeequally-dominated:connectionconnected *
## metaecosystem_typeheterotrophic-dominated:connectionconnected ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# --- SHOW MODEL ANOVA --- #
anova_output = car::Anova(full_model[[1]], type = "III")
# --- GET MODEL CONSTRASTS --- #
emmeans_output = emmeans(full_model[[1]],
specs = pairwise ~ metaecosystem_type:connection,
adjust = "tukey",
bias.adj = TRUE,
lmer.df = "satterthwaite")
emmeans_output
## $emmeans
## metaecosystem_type connection emmean SE df asymp.LCL asymp.UCL
## autotrophic-dominated unconnected 5.82 0.133 Inf 5.56 6.08
## equally-dominated unconnected 5.59 0.136 Inf 5.32 5.85
## heterotrophic-dominated unconnected 5.33 0.138 Inf 5.06 5.60
## autotrophic-dominated connected 6.04 0.132 Inf 5.78 6.30
## equally-dominated connected 5.56 0.136 Inf 5.30 5.83
## heterotrophic-dominated connected 4.94 0.144 Inf 4.66 5.22
##
## Results are given on the log (not the response) scale.
## Confidence level used: 0.95
##
## $contrasts
## contrast
## (autotrophic-dominated unconnected) - (equally-dominated unconnected)
## (autotrophic-dominated unconnected) - (heterotrophic-dominated unconnected)
## (autotrophic-dominated unconnected) - (autotrophic-dominated connected)
## (autotrophic-dominated unconnected) - (equally-dominated connected)
## (autotrophic-dominated unconnected) - (heterotrophic-dominated connected)
## (equally-dominated unconnected) - (heterotrophic-dominated unconnected)
## (equally-dominated unconnected) - (autotrophic-dominated connected)
## (equally-dominated unconnected) - (equally-dominated connected)
## (equally-dominated unconnected) - (heterotrophic-dominated connected)
## (heterotrophic-dominated unconnected) - (autotrophic-dominated connected)
## (heterotrophic-dominated unconnected) - (equally-dominated connected)
## (heterotrophic-dominated unconnected) - (heterotrophic-dominated connected)
## (autotrophic-dominated connected) - (equally-dominated connected)
## (autotrophic-dominated connected) - (heterotrophic-dominated connected)
## (equally-dominated connected) - (heterotrophic-dominated connected)
## estimate SE df z.ratio p.value
## 0.2318 0.0867 Inf 2.672 0.0808
## 0.4888 0.0910 Inf 5.372 <.0001
## -0.2179 0.0800 Inf -2.724 0.0706
## 0.2584 0.0863 Inf 2.993 0.0329
## 0.8777 0.0988 Inf 8.880 <.0001
## 0.2570 0.0937 Inf 2.744 0.0670
## -0.4497 0.0833 Inf -5.397 <.0001
## 0.0266 0.0905 Inf 0.294 0.9997
## 0.6459 0.1010 Inf 6.391 <.0001
## -0.7067 0.0880 Inf -8.035 <.0001
## -0.2304 0.0944 Inf -2.440 0.1427
## 0.3889 0.1050 Inf 3.705 0.0029
## 0.4764 0.0840 Inf 5.670 <.0001
## 1.0956 0.0959 Inf 11.424 <.0001
## 0.6193 0.1020 Inf 6.064 <.0001
##
## Results are given on the log (not the response) scale.
## P value adjustment: tukey method for comparing a family of 6 estimates
# Give a name for each level which you want to compare
AD_unconnected = c(1, 0, 0, 0, 0, 0)
ED_unconnected = c(0, 1, 0, 0, 0, 0)
HD_unconnected = c(0, 0, 1, 0, 0, 0)
AD_connected = c(0, 0, 0, 1, 0, 0)
ED_connected = c(0, 0, 0, 0, 1, 0)
HD_connected = c(0, 0, 0, 0, 0, 1)
# Compute the contrasts
n_of_digits = 3
contrasts = contrast(emmeans_output,
method = list("auto-dom connection effect" = AD_connected - AD_unconnected,
"equally-dom connection effect" = ED_connected - ED_unconnected,
"hetero-dom connection effect" = HD_connected - HD_unconnected,
"auto- vs equally-dom connected" = AD_connected - ED_connected,
"hetero- vs equally-dom connected" = HD_connected - ED_connected,
"auto- vs equally-dom unconnected" = AD_unconnected - ED_unconnected,
"hetero- vs equally-dom unconnected" = HD_unconnected - ED_unconnected)) %>%
as.data.frame() %>%
mutate(p.value = round(p.value, digits = n_of_digits),
estimate = round(estimate, digits = n_of_digits),
SE = round(SE, digits = n_of_digits),
df = round(df, digits = n_of_digits),
z.ratio = round(z.ratio, digits = n_of_digits),
e = "",
e = ifelse(p.value > 0.1,
"",
e),
e = ifelse(p.value < 0.05,
"*",
e),
e = ifelse(p.value < 0.01,
"**",
e),
e = ifelse(p.value < 0.001,
"***",
e)) %>%
rename(" " = e)
# --- SHOW MODEL RESULTS FOR A SINGLE COMBINATIONS --- #
anova_output
## Analysis of Deviance Table (Type III Wald chisquare tests)
##
## Response: get(response_variable_selected)
## Chisq Df Pr(>Chisq)
## (Intercept) 1907.8159 1 < 2.2e-16 ***
## metaecosystem_type 28.9600 2 5.145e-07 ***
## connection 7.4198 1 0.006451 **
## metaecosystem_type:connection 21.2064 2 2.484e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
contrasts
## contrast estimate SE df z.ratio p.value
## 1 auto-dom connection effect 0.218 0.080 Inf 2.724 0.006 **
## 2 equally-dom connection effect -0.027 0.090 Inf -0.294 0.769
## 3 hetero-dom connection effect -0.389 0.105 Inf -3.705 0.000 ***
## 4 auto- vs equally-dom connected 0.476 0.084 Inf 5.670 0.000 ***
## 5 hetero- vs equally-dom connected -0.619 0.102 Inf -6.064 0.000 ***
## 6 auto- vs equally-dom unconnected 0.232 0.087 Inf 2.672 0.008 **
## 7 hetero- vs equally-dom unconnected -0.257 0.094 Inf -2.744 0.006 **
# --- SHOW WHICH RANDOM SETS OF SETS HAVE BOOTSTRAPPED --- #
# Show to make sure that there's no bias in which ecosystem combinations have
# been combined.
hist(random_nrs,
main = "Random sets of sets used for bootrapping")
# --- CALCULATE THE CONTRASTS FOR ALL ECOSYSTEM COMBINATIONS --- #
# Initialise lists
ANOVA_output = list()
emmeans_output = list()
contrasts = list()
# Loop through the sets of sets to calculate their contrasts
for (i in 1:length(random_nrs)){
# Perform ANOVA
ANOVA_output[[i]] = car::Anova(full_model[[i]]) %>%
as.data.frame() %>%
mutate(variable = rownames(.))
colnames(ANOVA_output[[i]]) = c("Chisq",
"Df",
"p",
"variable")
# Compute estimated marginal means (EMMeans) for the interaction of
# metaecosystem type and connection
emmeans_output[[i]] = emmeans(full_model[[i]],
specs = pairwise ~ metaecosystem_type:connection,
adjust = "tukey",
bias.adj = TRUE,
lmer.df = "satterthwaite")
AD_unconnected = c(1, 0, 0, 0, 0, 0)
ED_unconnected = c(0, 1, 0, 0, 0, 0)
HD_unconnected = c(0, 0, 1, 0, 0, 0)
AD_connected = c(0, 0, 0, 1, 0, 0)
ED_connected = c(0, 0, 0, 0, 1, 0)
HD_connected = c(0, 0, 0, 0, 0, 1)
contrasts[[i]] = contrast(emmeans_output[[i]],
method = list("auto-dom connection effect" = AD_connected - AD_unconnected,
"equally-dom connection effect" = ED_connected - ED_unconnected,
"hetero-dom connection effect" = HD_connected - HD_unconnected,
"auto- vs equally-dom connected" = AD_connected - ED_connected,
"heterotrophic- vs equally-dom connected" = HD_connected - ED_connected,
"auto- vs equally-dom unconnected" = AD_unconnected - ED_unconnected,
"heterotrophic- vs equally-dom unconnected" = HD_unconnected - ED_unconnected)) %>%
as.data.frame()
contrast_levels = contrasts[[i]]$contrast
}
# Average ANOVA across sets of sets
ANOVA = do.call(rbind, ANOVA_output) %>%
group_by(variable) %>%
summarise(Chisq = round(mean(Chisq), digits = 1),
Df = round(mean(Df), digits = 1),
p = round(mean(p), digits = 4)) %>%
mutate(variable = factor(variable,
levels = c("metaecosystem_type",
"connection",
"metaecosystem_type:connection"))) %>%
arrange(variable)
# Average the contrasts across sets of sets
contrasts = do.call(rbind, contrasts) %>%
group_by(contrast) %>%
summarise(estimate = round(mean(estimate), digits = 1),
SE = round(mean(SE), digits = 1),
df = round(mean(df), digits = 0),
z.ratio = round(mean(z.ratio), digits = 2),
p.value = round(mean(p.value), digits = 4)) %>%
mutate(contrast = factor(contrast,
levels = c("auto-dom connection effect",
"equally-dom connection effect",
"hetero-dom connection effect",
"auto- vs equally-dom connected",
"hetero- vs equally-dom connected",
"auto- vs equally-dom unconnected",
"hetero- vs equally-dom unconnected"))) %>%
arrange(contrast)
# --- SHOW MODEL RESULTS FOR ALL COMBINATIONS --- #
ANOVA
## # A tibble: 3 × 4
## variable Chisq Df p
## <fct> <dbl> <dbl> <dbl>
## 1 metaecosystem_type 141. 2 0
## 2 connection 0.1 1 0.798
## 3 metaecosystem_type:connection 21.3 2 0
contrasts
## # A tibble: 7 × 6
## contrast estimate SE df z.ratio p.value
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 auto-dom connection effect 0.2 0.1 Inf 2.71 0.0069
## 2 equally-dom connection effect 0 0.1 Inf -0.29 0.775
## 3 hetero-dom connection effect -0.4 0.1 Inf -3.73 0.0002
## 4 auto- vs equally-dom connected 0.5 0.1 Inf 5.65 0
## 5 auto- vs equally-dom unconnected 0.2 0.1 Inf 2.68 0.0075
## 6 <NA> -0.6 0.1 Inf -6.11 0
## 7 <NA> -0.3 0.1 Inf -2.74 0.0061
trophy_selected = "autotrophic"
response_variable_selected = "bioarea_mm2_per_ml"
distribution_selected = "tweedie"
# --- PLOT ORIGINAL DATA WITH MEANS --- #
plot = plot.ecosystems.points(ds_ecosystems %>%
filter(trophic_type == trophy_selected),
response_variable_selected,
legend_row_n_input = 3)
plot
# --- PLOT ORIGINAL DATA WITH SINGLE REPLICATES --- #
plot.ecosystems.replicates.one.by.one(ds_ecosystems,
ecosystem_size_and_trophy_selected,
response_variable_selected)
## [1] small autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] small heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] medium autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] medium heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] large autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] large heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
# --- PREPARE DATA FOR ANALYSIS --- #
# Add baselines
baselines = ds_ecosystems %>%
filter(time_point == time_point_of_baselines) %>%
select(ecosystem_ID,
all_of(response_variable_selected)) %>%
rename(baseline = all_of(response_variable_selected))
data_for_analysis = ds_ecosystems %>%
left_join(baselines)
# Filter data
data_for_analysis = data_for_analysis %>%
filter(trophic_type == trophy_selected,
time_point %in% time_points_model,
!is.na(!!sym(response_variable_selected))) %>%
mutate(ecosystem_ID = as.character(ecosystem_ID),
size = NA,
size = case_when(ecosystem_size == "small" ~ "S",
ecosystem_size == "medium" ~ "M",
ecosystem_size == "large" ~ "L",
TRUE ~ size),
size = factor(size, levels = c("S", "M", "L")),
size = as.character(size))
# --- PLOT DATA FOR ANALYSIS WITH MEANS --- #
plot.ecosystems.points(data_for_analysis,
response_variable_selected,
legend_row_n_input = 3)
# --- PLOT DATA FOR ANALYSIS WITH SINGLE REPLICATES --- #
plot.ecosystems.replicates.one.by.one(data_for_analysis,
ecosystem_size_and_trophy_selected,
response_variable_selected)
## [1] small autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] medium autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] large autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
# --- DEFINE FORMULA --- #
formula = paste("get(response_variable_selected) ~",
"size * connection +",
"(1 | baseline) +",
"(1 | day)") %>%
print()
## [1] "get(response_variable_selected) ~ size * connection + (1 | baseline) + (1 | day)"
formula = as.formula(formula)
# --- CREATE MODEL (WITH TWEEDIE DISTRIBUTION) --- #
if(trophy_selected != "heterotrophic" && response_variable_selected != "median_body_size_µm2"){
model <- glmmTMB::glmmTMB(formula,
data = data_for_analysis,
family = glmmTMB::tweedie(link = "log"))
}
if(trophy_selected == "heterotrophic" && response_variable_selected == "median_body_size_µm2"){
model <- glmmTMB(formula,
data = data_for_analysis,
family = glmmTMB::tweedie(link = "log"),
control = glmmTMBControl(optimizer = optim,
optArgs = list(method = "BFGS")))
}
# --- PLOT RESIDUALS --- #
qqnorm(resid(model))
qqline(resid(model))
create.res.vs.fit.ecosystems(model,
data_for_analysis)
# --- SHOW MODEL SUMMARY --- #
print(summary(model), digits = 3)
## Family: tweedie ( log )
## Formula:
## get(response_variable_selected) ~ size * connection + (1 | baseline) +
## (1 | day)
## Data: data_for_analysis
##
## AIC BIC logLik deviance df.resid
## 422.6 447.6 -201.3 402.6 80
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## baseline (Intercept) 1.23e-10 1.11e-05
## day (Intercept) 1.04e-01 3.23e-01
## Number of obs: 90, groups: baseline, 30; day, 3
##
## Dispersion parameter for tweedie family (): 0.556
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.1055 0.2072 10.16 <2e-16 ***
## sizeM -0.0378 0.1277 -0.30 0.767
## sizeS -1.5262 0.1805 -8.46 <2e-16 ***
## connectionconnected 0.2617 0.1212 2.16 0.031 *
## sizeM:connectionconnected -0.3362 0.1782 -1.89 0.059 .
## sizeS:connectionconnected 0.2180 0.2357 0.92 0.355
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# --- PERFORM ANOVA --- #
ANOVA = car::Anova(model, type = "III") %>%
print()
## Analysis of Deviance Table (Type III Wald chisquare tests)
##
## Response: get(response_variable_selected)
## Chisq Df Pr(>Chisq)
## (Intercept) 103.2826 1 < 2e-16 ***
## size 79.8900 2 < 2e-16 ***
## connection 4.6614 1 0.03085 *
## size:connection 6.4376 2 0.04000 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# --- ESTIMATE MARGINAL MEANS --- #
emmeans_output = emmeans(model,
specs = pairwise ~ size:connection,
adjust = "tukey",
bias.adj = TRUE,
lmer.df = "satterthwaite")
# --- CODE THE CONTRAST LEVELS --- #
emmeans_output
## $emmeans
## size connection emmean SE df asymp.LCL asymp.UCL
## L unconnected 2.106 0.207 Inf 1.699 2.51
## M unconnected 2.068 0.208 Inf 1.660 2.48
## S unconnected 0.579 0.244 Inf 0.101 1.06
## L connected 2.367 0.204 Inf 1.968 2.77
## M connected 1.993 0.209 Inf 1.584 2.40
## S connected 1.059 0.229 Inf 0.611 1.51
##
## Results are given on the log (not the response) scale.
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df z.ratio p.value
## L unconnected - M unconnected 0.0378 0.128 Inf 0.296 0.9997
## L unconnected - S unconnected 1.5262 0.180 Inf 8.456 <.0001
## L unconnected - L connected -0.2617 0.121 Inf -2.159 0.2572
## L unconnected - M connected 0.1123 0.129 Inf 0.868 0.9541
## L unconnected - S connected 1.0466 0.159 Inf 6.575 <.0001
## M unconnected - S unconnected 1.4884 0.180 Inf 8.254 <.0001
## M unconnected - L connected -0.2994 0.122 Inf -2.451 0.1391
## M unconnected - M connected 0.0745 0.131 Inf 0.571 0.9929
## M unconnected - S connected 1.0088 0.159 Inf 6.334 <.0001
## S unconnected - L connected -1.7879 0.176 Inf -10.130 <.0001
## S unconnected - M connected -1.4139 0.183 Inf -7.725 <.0001
## S unconnected - S connected -0.4796 0.202 Inf -2.373 0.1658
## L connected - M connected 0.3740 0.124 Inf 3.015 0.0309
## L connected - S connected 1.3082 0.155 Inf 8.457 <.0001
## M connected - S connected 0.9343 0.162 Inf 5.771 <.0001
##
## Results are given on the log (not the response) scale.
## P value adjustment: tukey method for comparing a family of 6 estimates
L_unconnected = c(1, 0, 0, 0, 0, 0)
M_unconnected = c(0, 1, 0, 0, 0, 0)
S_unconnected = c(0, 0, 1, 0, 0, 0)
L_connected = c(0, 0, 0, 1, 0, 0)
M_connected = c(0, 0, 0, 0, 1, 0)
S_connected = c(0, 0, 0, 0, 0, 1)
# --- CALCULATE CONTRASTS YOU ARE INTERESTED IN (T-RATIO, USED FOR RESPONSE VARIABLES WITH NORMAL DISTRIBUTION)--- #
n_of_digits = 3
contrasts = contrast(emmeans_output,
method = list("small connection effect" = S_connected - S_unconnected,
"medium connection effect" = M_connected - M_unconnected,
"large connection effect" = L_connected - L_unconnected,
"Size effect in unconnected (L - S)" = L_unconnected - S_unconnected,
"Size effect in unconnected (M - S)" = M_unconnected - S_unconnected,
"Size effect in connected (L - S)" = L_connected - S_connected,
"Size effect in connected (M - S)" = M_connected - S_connected)) %>%
as.data.frame() %>%
mutate(p.value = round(p.value, digits = n_of_digits),
estimate = round(estimate, digits = n_of_digits),
SE = round(SE, digits = n_of_digits),
df = round(df, digits = n_of_digits),
t.ratio = round(t.ratio, digits = n_of_digits),
e = "",
e = ifelse(p.value > 0.1,
"",
e),
e = ifelse(p.value < 0.05,
"*",
e),
e = ifelse(p.value < 0.01,
"**",
e),
e = ifelse(p.value < 0.001,
"***",
e)) %>%
rename(" " = e)
# --- CALCULATE CONTRASTS YOU ARE INTERESTED IN (Z RATIO, USED FOR REPONSE VARIABLES WITH TWEEDIE DISRIBUTION) --- #
n_of_digits = 3
contrasts = contrast(emmeans_output,
method = list("small connection effect" = S_connected - S_unconnected,
"medium connection effect" = M_connected - M_unconnected,
"large connection effect" = L_connected - L_unconnected,
"Size effect in unconnected (L - S)" = L_unconnected - S_unconnected,
"Size effect in unconnected (M - S)" = M_unconnected - S_unconnected,
"Size effect in connected (L - S)" = L_connected - S_connected,
"Size effect in connected (M - S)" = M_connected - S_connected)) %>%
as.data.frame() %>%
mutate(p.value = round(p.value, digits = n_of_digits),
estimate = round(estimate, digits = n_of_digits),
SE = round(SE, digits = n_of_digits),
df = round(df, digits = n_of_digits),
z.ratio = round(z.ratio, digits = n_of_digits),
e = "",
e = ifelse(p.value > 0.1,
"",
e),
e = ifelse(p.value < 0.05,
"*",
e),
e = ifelse(p.value < 0.01,
"**",
e),
e = ifelse(p.value < 0.001,
"***",
e)) %>%
rename(" " = e)
# --- SHOW RESULTS --- #
ANOVA
## Analysis of Deviance Table (Type III Wald chisquare tests)
##
## Response: get(response_variable_selected)
## Chisq Df Pr(>Chisq)
## (Intercept) 103.2826 1 < 2e-16 ***
## size 79.8900 2 < 2e-16 ***
## connection 4.6614 1 0.03085 *
## size:connection 6.4376 2 0.04000 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
contrasts
## contrast estimate SE df z.ratio p.value
## 1 small connection effect 0.480 0.202 Inf 2.373 0.018 *
## 2 medium connection effect -0.075 0.131 Inf -0.571 0.568
## 3 large connection effect 0.262 0.121 Inf 2.159 0.031 *
## 4 Size effect in unconnected (L - S) 1.526 0.180 Inf 8.456 0.000 ***
## 5 Size effect in unconnected (M - S) 1.488 0.180 Inf 8.254 0.000 ***
## 6 Size effect in connected (L - S) 1.308 0.155 Inf 8.457 0.000 ***
## 7 Size effect in connected (M - S) 0.934 0.162 Inf 5.771 0.000 ***
response_variable_selected = "median_body_size_µm2"
distribution_selected = "tweedie"
# --- PLOT ORIGINAL DATA WITH MEANS --- #
plot = plot.ecosystems.points(ds_ecosystems %>%
filter(trophic_type == trophy_selected),
response_variable_selected,
legend_row_n_input = 3)
plot
# --- PLOT ORIGINAL DATA WITH SINGLE REPLICATES --- #
plot.ecosystems.replicates.one.by.one(ds_ecosystems,
ecosystem_size_and_trophy_selected,
response_variable_selected)
## [1] small autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] small heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] medium autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] medium heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] large autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] large heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
# --- PREPARE DATA FOR ANALYSIS --- #
# Add baselines
baselines = ds_ecosystems %>%
filter(time_point == time_point_of_baselines) %>%
select(ecosystem_ID,
all_of(response_variable_selected)) %>%
rename(baseline = all_of(response_variable_selected))
data_for_analysis = ds_ecosystems %>%
left_join(baselines)
# Filter data
data_for_analysis = data_for_analysis %>%
filter(trophic_type == trophy_selected,
time_point %in% time_points_model,
!is.na(!!sym(response_variable_selected))) %>%
mutate(ecosystem_ID = as.character(ecosystem_ID),
size = NA,
size = case_when(ecosystem_size == "small" ~ "S",
ecosystem_size == "medium" ~ "M",
ecosystem_size == "large" ~ "L",
TRUE ~ size),
size = factor(size, levels = c("S", "M", "L")),
size = as.character(size))
# --- PLOT DATA FOR ANALYSIS WITH MEANS --- #
plot.ecosystems.points(data_for_analysis,
response_variable_selected,
legend_row_n_input = 3)
# --- PLOT DATA FOR ANALYSIS WITH SINGLE REPLICATES --- #
plot.ecosystems.replicates.one.by.one(data_for_analysis,
ecosystem_size_and_trophy_selected,
response_variable_selected)
## [1] small autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] medium autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] large autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
# --- DEFINE FORMULA --- #
formula = paste("get(response_variable_selected) ~",
"size * connection +",
"(1 | baseline) +",
"(1 | day)") %>%
print()
## [1] "get(response_variable_selected) ~ size * connection + (1 | baseline) + (1 | day)"
formula = as.formula(formula)
# --- CREATE MODEL (WITH TWEEDIE DISTRIBUTION) --- #
if(trophy_selected != "heterotrophic" && response_variable_selected != "median_body_size_µm2"){
model <- glmmTMB::glmmTMB(formula,
data = data_for_analysis,
family = glmmTMB::tweedie(link = "log"))
}
if(trophy_selected == "heterotrophic" && response_variable_selected == "median_body_size_µm2"){
model <- glmmTMB(formula,
data = data_for_analysis,
family = glmmTMB::tweedie(link = "log"),
control = glmmTMBControl(optimizer = optim,
optArgs = list(method = "BFGS")))
}
# --- PLOT RESIDUALS --- #
qqnorm(resid(model))
qqline(resid(model))
create.res.vs.fit.ecosystems(model,
data_for_analysis)
# --- SHOW MODEL SUMMARY --- #
print(summary(model), digits = 3)
## Family: tweedie ( log )
## Formula:
## get(response_variable_selected) ~ size * connection + (1 | baseline) +
## (1 | day)
## Data: data_for_analysis
##
## AIC BIC logLik deviance df.resid
## 422.6 447.6 -201.3 402.6 80
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## baseline (Intercept) 1.23e-10 1.11e-05
## day (Intercept) 1.04e-01 3.23e-01
## Number of obs: 90, groups: baseline, 30; day, 3
##
## Dispersion parameter for tweedie family (): 0.556
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.1055 0.2072 10.16 <2e-16 ***
## sizeM -0.0378 0.1277 -0.30 0.767
## sizeS -1.5262 0.1805 -8.46 <2e-16 ***
## connectionconnected 0.2617 0.1212 2.16 0.031 *
## sizeM:connectionconnected -0.3362 0.1782 -1.89 0.059 .
## sizeS:connectionconnected 0.2180 0.2357 0.92 0.355
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# --- PERFORM ANOVA --- #
ANOVA = car::Anova(model, type = "III") %>%
print()
## Analysis of Deviance Table (Type III Wald chisquare tests)
##
## Response: get(response_variable_selected)
## Chisq Df Pr(>Chisq)
## (Intercept) 103.2826 1 < 2e-16 ***
## size 79.8900 2 < 2e-16 ***
## connection 4.6614 1 0.03085 *
## size:connection 6.4376 2 0.04000 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# --- ESTIMATE MARGINAL MEANS --- #
emmeans_output = emmeans(model,
specs = pairwise ~ size:connection,
adjust = "tukey",
bias.adj = TRUE,
lmer.df = "satterthwaite")
# --- CODE THE CONTRAST LEVELS --- #
emmeans_output
## $emmeans
## size connection emmean SE df asymp.LCL asymp.UCL
## L unconnected 2.106 0.207 Inf 1.699 2.51
## M unconnected 2.068 0.208 Inf 1.660 2.48
## S unconnected 0.579 0.244 Inf 0.101 1.06
## L connected 2.367 0.204 Inf 1.968 2.77
## M connected 1.993 0.209 Inf 1.584 2.40
## S connected 1.059 0.229 Inf 0.611 1.51
##
## Results are given on the log (not the response) scale.
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df z.ratio p.value
## L unconnected - M unconnected 0.0378 0.128 Inf 0.296 0.9997
## L unconnected - S unconnected 1.5262 0.180 Inf 8.456 <.0001
## L unconnected - L connected -0.2617 0.121 Inf -2.159 0.2572
## L unconnected - M connected 0.1123 0.129 Inf 0.868 0.9541
## L unconnected - S connected 1.0466 0.159 Inf 6.575 <.0001
## M unconnected - S unconnected 1.4884 0.180 Inf 8.254 <.0001
## M unconnected - L connected -0.2994 0.122 Inf -2.451 0.1391
## M unconnected - M connected 0.0745 0.131 Inf 0.571 0.9929
## M unconnected - S connected 1.0088 0.159 Inf 6.334 <.0001
## S unconnected - L connected -1.7879 0.176 Inf -10.130 <.0001
## S unconnected - M connected -1.4139 0.183 Inf -7.725 <.0001
## S unconnected - S connected -0.4796 0.202 Inf -2.373 0.1658
## L connected - M connected 0.3740 0.124 Inf 3.015 0.0309
## L connected - S connected 1.3082 0.155 Inf 8.457 <.0001
## M connected - S connected 0.9343 0.162 Inf 5.771 <.0001
##
## Results are given on the log (not the response) scale.
## P value adjustment: tukey method for comparing a family of 6 estimates
L_unconnected = c(1, 0, 0, 0, 0, 0)
M_unconnected = c(0, 1, 0, 0, 0, 0)
S_unconnected = c(0, 0, 1, 0, 0, 0)
L_connected = c(0, 0, 0, 1, 0, 0)
M_connected = c(0, 0, 0, 0, 1, 0)
S_connected = c(0, 0, 0, 0, 0, 1)
# --- CALCULATE CONTRASTS YOU ARE INTERESTED IN (T-RATIO, USED FOR RESPONSE VARIABLES WITH NORMAL DISTRIBUTION)--- #
n_of_digits = 3
contrasts = contrast(emmeans_output,
method = list("small connection effect" = S_connected - S_unconnected,
"medium connection effect" = M_connected - M_unconnected,
"large connection effect" = L_connected - L_unconnected,
"Size effect in unconnected (L - S)" = L_unconnected - S_unconnected,
"Size effect in unconnected (M - S)" = M_unconnected - S_unconnected,
"Size effect in connected (L - S)" = L_connected - S_connected,
"Size effect in connected (M - S)" = M_connected - S_connected)) %>%
as.data.frame() %>%
mutate(p.value = round(p.value, digits = n_of_digits),
estimate = round(estimate, digits = n_of_digits),
SE = round(SE, digits = n_of_digits),
df = round(df, digits = n_of_digits),
t.ratio = round(t.ratio, digits = n_of_digits),
e = "",
e = ifelse(p.value > 0.1,
"",
e),
e = ifelse(p.value < 0.05,
"*",
e),
e = ifelse(p.value < 0.01,
"**",
e),
e = ifelse(p.value < 0.001,
"***",
e)) %>%
rename(" " = e)
# --- CALCULATE CONTRASTS YOU ARE INTERESTED IN (Z RATIO, USED FOR REPONSE VARIABLES WITH TWEEDIE DISRIBUTION) --- #
n_of_digits = 3
contrasts = contrast(emmeans_output,
method = list("small connection effect" = S_connected - S_unconnected,
"medium connection effect" = M_connected - M_unconnected,
"large connection effect" = L_connected - L_unconnected,
"Size effect in unconnected (L - S)" = L_unconnected - S_unconnected,
"Size effect in unconnected (M - S)" = M_unconnected - S_unconnected,
"Size effect in connected (L - S)" = L_connected - S_connected,
"Size effect in connected (M - S)" = M_connected - S_connected)) %>%
as.data.frame() %>%
mutate(p.value = round(p.value, digits = n_of_digits),
estimate = round(estimate, digits = n_of_digits),
SE = round(SE, digits = n_of_digits),
df = round(df, digits = n_of_digits),
z.ratio = round(z.ratio, digits = n_of_digits),
e = "",
e = ifelse(p.value > 0.1,
"",
e),
e = ifelse(p.value < 0.05,
"*",
e),
e = ifelse(p.value < 0.01,
"**",
e),
e = ifelse(p.value < 0.001,
"***",
e)) %>%
rename(" " = e)
# --- SHOW RESULTS --- #
ANOVA
## Analysis of Deviance Table (Type III Wald chisquare tests)
##
## Response: get(response_variable_selected)
## Chisq Df Pr(>Chisq)
## (Intercept) 103.2826 1 < 2e-16 ***
## size 79.8900 2 < 2e-16 ***
## connection 4.6614 1 0.03085 *
## size:connection 6.4376 2 0.04000 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
contrasts
## contrast estimate SE df z.ratio p.value
## 1 small connection effect 0.480 0.202 Inf 2.373 0.018 *
## 2 medium connection effect -0.075 0.131 Inf -0.571 0.568
## 3 large connection effect 0.262 0.121 Inf 2.159 0.031 *
## 4 Size effect in unconnected (L - S) 1.526 0.180 Inf 8.456 0.000 ***
## 5 Size effect in unconnected (M - S) 1.488 0.180 Inf 8.254 0.000 ***
## 6 Size effect in connected (L - S) 1.308 0.155 Inf 8.457 0.000 ***
## 7 Size effect in connected (M - S) 0.934 0.162 Inf 5.771 0.000 ***
# --- SAVE PLOT --- #
# Generate the image where to save the plot
pdf(file = here("..",
"3_results",
"figures",
"paper",
"ecosystems_median_body_size_auto.pdf"),
width = paper_width_inches,
height = paper_height_inches)
# Save image to the file
plot
# Close the current graphics device to properly save the image
dev.off()
ecosystem_type_i = c("Small heterotrophic unconnected",
"Medium heterotrophic unconnected",
"Large heterotrophic unconnected",
"Small heterotrophic connected",
"Medium heterotrophic connected",
"Large heterotrophic connected")
ecosystem_size_and_trophy_selected = c("Small heterotrophic",
"Medium heterotrophic",
"Large heterotrophic")
trophy_selected = "heterotrophic"
response_variable_selected = "bioarea_mm2_per_ml"
distribution_selected = "tweedie"
# --- PLOT ORIGINAL DATA WITH MEANS --- #
plot = plot.ecosystems.points(ds_ecosystems %>%
filter(trophic_type == trophy_selected),
response_variable_selected,
legend_row_n_input = 3)
plot
# --- PLOT ORIGINAL DATA WITH SINGLE REPLICATES --- #
plot.ecosystems.replicates.one.by.one(ds_ecosystems,
ecosystem_size_and_trophy_selected,
response_variable_selected)
## [1] small autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] small heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] medium autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] medium heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] large autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] large heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
# --- PREPARE DATA FOR ANALYSIS --- #
# Add baselines
baselines = ds_ecosystems %>%
filter(time_point == time_point_of_baselines) %>%
select(ecosystem_ID,
all_of(response_variable_selected)) %>%
rename(baseline = all_of(response_variable_selected))
data_for_analysis = ds_ecosystems %>%
left_join(baselines)
# Filter data
data_for_analysis = data_for_analysis %>%
filter(trophic_type == trophy_selected,
time_point %in% time_points_model,
!is.na(!!sym(response_variable_selected))) %>%
mutate(ecosystem_ID = as.character(ecosystem_ID),
size = NA,
size = case_when(ecosystem_size == "small" ~ "S",
ecosystem_size == "medium" ~ "M",
ecosystem_size == "large" ~ "L",
TRUE ~ size),
size = factor(size, levels = c("S", "M", "L")),
size = as.character(size))
# --- PLOT DATA FOR ANALYSIS WITH MEANS --- #
plot.ecosystems.points(data_for_analysis,
response_variable_selected,
legend_row_n_input = 3)
# --- PLOT DATA FOR ANALYSIS WITH SINGLE REPLICATES --- #
plot.ecosystems.replicates.one.by.one(data_for_analysis,
ecosystem_size_and_trophy_selected,
response_variable_selected)
## [1] small heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] medium heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] large heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
# --- DEFINE FORMULA --- #
formula = paste("get(response_variable_selected) ~",
"size * connection +",
"(1 | baseline) +",
"(1 | day)") %>%
print()
## [1] "get(response_variable_selected) ~ size * connection + (1 | baseline) + (1 | day)"
formula = as.formula(formula)
# --- CREATE MODEL (WITH TWEEDIE DISTRIBUTION) --- #
if(trophy_selected != "heterotrophic" && response_variable_selected != "median_body_size_µm2"){
model <- glmmTMB::glmmTMB(formula,
data = data_for_analysis,
family = glmmTMB::tweedie(link = "log"))
}
if(trophy_selected == "heterotrophic" && response_variable_selected == "median_body_size_µm2"){
model <- glmmTMB(formula,
data = data_for_analysis,
family = glmmTMB::tweedie(link = "log"),
control = glmmTMBControl(optimizer = optim,
optArgs = list(method = "BFGS")))
}
# --- PLOT RESIDUALS --- #
qqnorm(resid(model))
qqline(resid(model))
create.res.vs.fit.ecosystems(model,
data_for_analysis)
# --- SHOW MODEL SUMMARY --- #
print(summary(model), digits = 3)
## Family: tweedie ( log )
## Formula:
## get(response_variable_selected) ~ size * connection + (1 | baseline) +
## (1 | day)
## Data: data_for_analysis
##
## AIC BIC logLik deviance df.resid
## 422.6 447.6 -201.3 402.6 80
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## baseline (Intercept) 1.23e-10 1.11e-05
## day (Intercept) 1.04e-01 3.23e-01
## Number of obs: 90, groups: baseline, 30; day, 3
##
## Dispersion parameter for tweedie family (): 0.556
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.1055 0.2072 10.16 <2e-16 ***
## sizeM -0.0378 0.1277 -0.30 0.767
## sizeS -1.5262 0.1805 -8.46 <2e-16 ***
## connectionconnected 0.2617 0.1212 2.16 0.031 *
## sizeM:connectionconnected -0.3362 0.1782 -1.89 0.059 .
## sizeS:connectionconnected 0.2180 0.2357 0.92 0.355
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# --- PERFORM ANOVA --- #
ANOVA = car::Anova(model, type = "III") %>%
print()
## Analysis of Deviance Table (Type III Wald chisquare tests)
##
## Response: get(response_variable_selected)
## Chisq Df Pr(>Chisq)
## (Intercept) 103.2826 1 < 2e-16 ***
## size 79.8900 2 < 2e-16 ***
## connection 4.6614 1 0.03085 *
## size:connection 6.4376 2 0.04000 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# --- ESTIMATE MARGINAL MEANS --- #
emmeans_output = emmeans(model,
specs = pairwise ~ size:connection,
adjust = "tukey",
bias.adj = TRUE,
lmer.df = "satterthwaite")
# --- CODE THE CONTRAST LEVELS --- #
emmeans_output
## $emmeans
## size connection emmean SE df asymp.LCL asymp.UCL
## L unconnected 2.106 0.207 Inf 1.699 2.51
## M unconnected 2.068 0.208 Inf 1.660 2.48
## S unconnected 0.579 0.244 Inf 0.101 1.06
## L connected 2.367 0.204 Inf 1.968 2.77
## M connected 1.993 0.209 Inf 1.584 2.40
## S connected 1.059 0.229 Inf 0.611 1.51
##
## Results are given on the log (not the response) scale.
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df z.ratio p.value
## L unconnected - M unconnected 0.0378 0.128 Inf 0.296 0.9997
## L unconnected - S unconnected 1.5262 0.180 Inf 8.456 <.0001
## L unconnected - L connected -0.2617 0.121 Inf -2.159 0.2572
## L unconnected - M connected 0.1123 0.129 Inf 0.868 0.9541
## L unconnected - S connected 1.0466 0.159 Inf 6.575 <.0001
## M unconnected - S unconnected 1.4884 0.180 Inf 8.254 <.0001
## M unconnected - L connected -0.2994 0.122 Inf -2.451 0.1391
## M unconnected - M connected 0.0745 0.131 Inf 0.571 0.9929
## M unconnected - S connected 1.0088 0.159 Inf 6.334 <.0001
## S unconnected - L connected -1.7879 0.176 Inf -10.130 <.0001
## S unconnected - M connected -1.4139 0.183 Inf -7.725 <.0001
## S unconnected - S connected -0.4796 0.202 Inf -2.373 0.1658
## L connected - M connected 0.3740 0.124 Inf 3.015 0.0309
## L connected - S connected 1.3082 0.155 Inf 8.457 <.0001
## M connected - S connected 0.9343 0.162 Inf 5.771 <.0001
##
## Results are given on the log (not the response) scale.
## P value adjustment: tukey method for comparing a family of 6 estimates
L_unconnected = c(1, 0, 0, 0, 0, 0)
M_unconnected = c(0, 1, 0, 0, 0, 0)
S_unconnected = c(0, 0, 1, 0, 0, 0)
L_connected = c(0, 0, 0, 1, 0, 0)
M_connected = c(0, 0, 0, 0, 1, 0)
S_connected = c(0, 0, 0, 0, 0, 1)
# --- CALCULATE CONTRASTS YOU ARE INTERESTED IN (T-RATIO, USED FOR RESPONSE VARIABLES WITH NORMAL DISTRIBUTION)--- #
n_of_digits = 3
contrasts = contrast(emmeans_output,
method = list("small connection effect" = S_connected - S_unconnected,
"medium connection effect" = M_connected - M_unconnected,
"large connection effect" = L_connected - L_unconnected,
"Size effect in unconnected (L - S)" = L_unconnected - S_unconnected,
"Size effect in unconnected (M - S)" = M_unconnected - S_unconnected,
"Size effect in connected (L - S)" = L_connected - S_connected,
"Size effect in connected (M - S)" = M_connected - S_connected)) %>%
as.data.frame() %>%
mutate(p.value = round(p.value, digits = n_of_digits),
estimate = round(estimate, digits = n_of_digits),
SE = round(SE, digits = n_of_digits),
df = round(df, digits = n_of_digits),
t.ratio = round(t.ratio, digits = n_of_digits),
e = "",
e = ifelse(p.value > 0.1,
"",
e),
e = ifelse(p.value < 0.05,
"*",
e),
e = ifelse(p.value < 0.01,
"**",
e),
e = ifelse(p.value < 0.001,
"***",
e)) %>%
rename(" " = e)
# --- CALCULATE CONTRASTS YOU ARE INTERESTED IN (Z RATIO, USED FOR REPONSE VARIABLES WITH TWEEDIE DISRIBUTION) --- #
n_of_digits = 3
contrasts = contrast(emmeans_output,
method = list("small connection effect" = S_connected - S_unconnected,
"medium connection effect" = M_connected - M_unconnected,
"large connection effect" = L_connected - L_unconnected,
"Size effect in unconnected (L - S)" = L_unconnected - S_unconnected,
"Size effect in unconnected (M - S)" = M_unconnected - S_unconnected,
"Size effect in connected (L - S)" = L_connected - S_connected,
"Size effect in connected (M - S)" = M_connected - S_connected)) %>%
as.data.frame() %>%
mutate(p.value = round(p.value, digits = n_of_digits),
estimate = round(estimate, digits = n_of_digits),
SE = round(SE, digits = n_of_digits),
df = round(df, digits = n_of_digits),
z.ratio = round(z.ratio, digits = n_of_digits),
e = "",
e = ifelse(p.value > 0.1,
"",
e),
e = ifelse(p.value < 0.05,
"*",
e),
e = ifelse(p.value < 0.01,
"**",
e),
e = ifelse(p.value < 0.001,
"***",
e)) %>%
rename(" " = e)
# --- SHOW RESULTS --- #
ANOVA
## Analysis of Deviance Table (Type III Wald chisquare tests)
##
## Response: get(response_variable_selected)
## Chisq Df Pr(>Chisq)
## (Intercept) 103.2826 1 < 2e-16 ***
## size 79.8900 2 < 2e-16 ***
## connection 4.6614 1 0.03085 *
## size:connection 6.4376 2 0.04000 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
contrasts
## contrast estimate SE df z.ratio p.value
## 1 small connection effect 0.480 0.202 Inf 2.373 0.018 *
## 2 medium connection effect -0.075 0.131 Inf -0.571 0.568
## 3 large connection effect 0.262 0.121 Inf 2.159 0.031 *
## 4 Size effect in unconnected (L - S) 1.526 0.180 Inf 8.456 0.000 ***
## 5 Size effect in unconnected (M - S) 1.488 0.180 Inf 8.254 0.000 ***
## 6 Size effect in connected (L - S) 1.308 0.155 Inf 8.457 0.000 ***
## 7 Size effect in connected (M - S) 0.934 0.162 Inf 5.771 0.000 ***
response_variable_selected = "species_richness"
distribution_selected = "gaussian"
# --- PLOT ORIGINAL DATA WITH MEANS --- #
plot = plot.ecosystems.points(ds_ecosystems %>%
filter(trophic_type == trophy_selected),
response_variable_selected,
legend_row_n_input = 3)
plot
# --- PLOT ORIGINAL DATA WITH SINGLE REPLICATES --- #
plot.ecosystems.replicates.one.by.one(ds_ecosystems,
ecosystem_size_and_trophy_selected,
response_variable_selected)
## [1] small autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] small heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] medium autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] medium heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] large autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] large heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
# --- PREPARE DATA FOR ANALYSIS --- #
# Add baselines
baselines = ds_ecosystems %>%
filter(time_point == time_point_of_baselines) %>%
select(ecosystem_ID,
all_of(response_variable_selected)) %>%
rename(baseline = all_of(response_variable_selected))
data_for_analysis = ds_ecosystems %>%
left_join(baselines)
# Filter data
data_for_analysis = data_for_analysis %>%
filter(trophic_type == trophy_selected,
time_point %in% time_points_model,
!is.na(!!sym(response_variable_selected))) %>%
mutate(ecosystem_ID = as.character(ecosystem_ID),
size = NA,
size = case_when(ecosystem_size == "small" ~ "S",
ecosystem_size == "medium" ~ "M",
ecosystem_size == "large" ~ "L",
TRUE ~ size),
size = factor(size, levels = c("S", "M", "L")),
size = as.character(size))
# --- PLOT DATA FOR ANALYSIS WITH MEANS --- #
plot.ecosystems.points(data_for_analysis,
response_variable_selected,
legend_row_n_input = 3)
# --- PLOT DATA FOR ANALYSIS WITH SINGLE REPLICATES --- #
plot.ecosystems.replicates.one.by.one(data_for_analysis,
ecosystem_size_and_trophy_selected,
response_variable_selected)
## [1] small heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] medium heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] large heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
# --- DEFINE FORMULA --- #
formula = paste("get(response_variable_selected) ~",
"size * connection +",
"(1 | baseline) +",
"(1 | day)") %>%
print()
## [1] "get(response_variable_selected) ~ size * connection + (1 | baseline) + (1 | day)"
formula = as.formula(formula)
# --- CREATE MODEL (WITH NORMAL DISTRIBUTION) --- #
model = lmer(formula,
data = data_for_analysis,
REML = FALSE,
control = lmerControl(optimizer = "Nelder_Mead"))
# --- PLOT RESIDUALS --- #
qqnorm(resid(model))
qqline(resid(model))
create.res.vs.fit.ecosystems(model,
data_for_analysis)
# --- SHOW MODEL SUMMARY --- #
print(summary(model), digits = 3)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: formula
## Data: data_for_analysis
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## AIC BIC logLik deviance df.resid
## 313.4 335.9 -147.7 295.4 81
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.1410 -0.6293 -0.0135 0.5738 2.4524
##
## Random effects:
## Groups Name Variance Std.Dev.
## baseline (Intercept) 0.000 0.000
## day (Intercept) 0.163 0.403
## Residual 1.486 1.219
## Number of obs: 90, groups: baseline, 6; day, 3
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 6.733 0.392 13.467 17.20 1.5e-10 ***
## sizeM -0.600 0.445 87.000 -1.35 0.1812
## sizeS -1.467 0.445 87.000 -3.29 0.0014 **
## connectionconnected -0.600 0.445 87.000 -1.35 0.1812
## sizeM:connectionconnected 1.133 0.630 87.000 1.80 0.0753 .
## sizeS:connectionconnected -1.133 0.630 87.000 -1.80 0.0753 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) sizeM sizeS cnnctn szM:cn
## sizeM -0.569
## sizeS -0.569 0.500
## cnnctncnnct -0.569 0.500 0.500
## szM:cnnctnc 0.402 -0.707 -0.354 -0.707
## szS:cnnctnc 0.402 -0.354 -0.707 -0.707 0.500
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# --- PERFORM ANOVA (GAUSSIAN) --- #
ANOVA = anova(model) %>%
print()
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## size 81.356 40.678 2 87 27.3678 6.022e-10 ***
## connection 8.100 8.100 1 87 5.4496 0.021876 *
## size:connection 19.267 9.633 2 87 6.4813 0.002378 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# --- ESTIMATE MARGINAL MEANS --- #
emmeans_output = emmeans(model,
specs = pairwise ~ size:connection,
adjust = "tukey",
bias.adj = TRUE,
lmer.df = "satterthwaite")
# --- CODE THE CONTRAST LEVELS --- #
emmeans_output
## $emmeans
## size connection emmean SE df lower.CL upper.CL
## L unconnected 6.73 0.392 13.5 5.89 7.58
## M unconnected 6.13 0.392 13.5 5.29 6.98
## S unconnected 5.27 0.392 13.5 4.42 6.11
## L connected 6.13 0.392 13.5 5.29 6.98
## M connected 6.67 0.392 13.5 5.82 7.51
## S connected 3.53 0.392 13.5 2.69 4.38
##
## Degrees-of-freedom method: satterthwaite
## Results are given on the get (not the response) scale.
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df t.ratio p.value
## L unconnected - M unconnected 0.6000 0.445 87 1.348 0.7575
## L unconnected - S unconnected 1.4667 0.445 87 3.295 0.0173
## L unconnected - L connected 0.6000 0.445 87 1.348 0.7575
## L unconnected - M connected 0.0667 0.445 87 0.150 1.0000
## L unconnected - S connected 3.2000 0.445 87 7.188 <.0001
## M unconnected - S unconnected 0.8667 0.445 87 1.947 0.3813
## M unconnected - L connected 0.0000 0.445 87 0.000 1.0000
## M unconnected - M connected -0.5333 0.445 87 -1.198 0.8367
## M unconnected - S connected 2.6000 0.445 87 5.840 <.0001
## S unconnected - L connected -0.8667 0.445 87 -1.947 0.3813
## S unconnected - M connected -1.4000 0.445 87 -3.145 0.0267
## S unconnected - S connected 1.7333 0.445 87 3.894 0.0026
## L connected - M connected -0.5333 0.445 87 -1.198 0.8367
## L connected - S connected 2.6000 0.445 87 5.840 <.0001
## M connected - S connected 3.1333 0.445 87 7.038 <.0001
##
## Note: contrasts are still on the get scale. Consider using
## regrid() if you want contrasts of back-transformed estimates.
## Degrees-of-freedom method: satterthwaite
## P value adjustment: tukey method for comparing a family of 6 estimates
L_unconnected = c(1, 0, 0, 0, 0, 0)
M_unconnected = c(0, 1, 0, 0, 0, 0)
S_unconnected = c(0, 0, 1, 0, 0, 0)
L_connected = c(0, 0, 0, 1, 0, 0)
M_connected = c(0, 0, 0, 0, 1, 0)
S_connected = c(0, 0, 0, 0, 0, 1)
# --- CALCULATE CONTRASTS YOU ARE INTERESTED IN (T-RATIO, USED FOR RESPONSE VARIABLES WITH NORMAL DISTRIBUTION)--- #
n_of_digits = 3
contrasts = contrast(emmeans_output,
method = list("small connection effect" = S_connected - S_unconnected,
"medium connection effect" = M_connected - M_unconnected,
"large connection effect" = L_connected - L_unconnected,
"Size effect in unconnected (L - S)" = L_unconnected - S_unconnected,
"Size effect in unconnected (M - S)" = M_unconnected - S_unconnected,
"Size effect in connected (L - S)" = L_connected - S_connected,
"Size effect in connected (M - S)" = M_connected - S_connected)) %>%
as.data.frame() %>%
mutate(p.value = round(p.value, digits = n_of_digits),
estimate = round(estimate, digits = n_of_digits),
SE = round(SE, digits = n_of_digits),
df = round(df, digits = n_of_digits),
t.ratio = round(t.ratio, digits = n_of_digits),
e = "",
e = ifelse(p.value > 0.1,
"",
e),
e = ifelse(p.value < 0.05,
"*",
e),
e = ifelse(p.value < 0.01,
"**",
e),
e = ifelse(p.value < 0.001,
"***",
e)) %>%
rename(" " = e)
# --- CALCULATE CONTRASTS YOU ARE INTERESTED IN (Z RATIO, USED FOR REPONSE VARIABLES WITH TWEEDIE DISRIBUTION) --- #
n_of_digits = 3
contrasts = contrast(emmeans_output,
method = list("small connection effect" = S_connected - S_unconnected,
"medium connection effect" = M_connected - M_unconnected,
"large connection effect" = L_connected - L_unconnected,
"Size effect in unconnected (L - S)" = L_unconnected - S_unconnected,
"Size effect in unconnected (M - S)" = M_unconnected - S_unconnected,
"Size effect in connected (L - S)" = L_connected - S_connected,
"Size effect in connected (M - S)" = M_connected - S_connected)) %>%
as.data.frame() %>%
mutate(p.value = round(p.value, digits = n_of_digits),
estimate = round(estimate, digits = n_of_digits),
SE = round(SE, digits = n_of_digits),
df = round(df, digits = n_of_digits),
z.ratio = round(z.ratio, digits = n_of_digits),
e = "",
e = ifelse(p.value > 0.1,
"",
e),
e = ifelse(p.value < 0.05,
"*",
e),
e = ifelse(p.value < 0.01,
"**",
e),
e = ifelse(p.value < 0.001,
"***",
e)) %>%
rename(" " = e)
# --- SHOW RESULTS --- #
ANOVA
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## size 81.356 40.678 2 87 27.3678 6.022e-10 ***
## connection 8.100 8.100 1 87 5.4496 0.021876 *
## size:connection 19.267 9.633 2 87 6.4813 0.002378 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
contrasts
## contrast estimate SE df t.ratio p.value
## 1 small connection effect -1.733 0.445 87 -3.894 0.000 ***
## 2 medium connection effect 0.533 0.445 87 1.198 0.234
## 3 large connection effect -0.600 0.445 87 -1.348 0.181
## 4 Size effect in unconnected (L - S) 1.467 0.445 87 3.295 0.001 **
## 5 Size effect in unconnected (M - S) 0.867 0.445 87 1.947 0.055
## 6 Size effect in connected (L - S) 2.600 0.445 87 5.840 0.000 ***
## 7 Size effect in connected (M - S) 3.133 0.445 87 7.038 0.000 ***
response_variable_selected = "shannon"
distribution_selected = "gaussian"
# --- PLOT ORIGINAL DATA WITH MEANS --- #
plot = plot.ecosystems.points(ds_ecosystems %>%
filter(trophic_type == trophy_selected),
response_variable_selected,
legend_row_n_input = 3)
plot
# --- PLOT ORIGINAL DATA WITH SINGLE REPLICATES --- #
plot.ecosystems.replicates.one.by.one(ds_ecosystems,
ecosystem_size_and_trophy_selected,
response_variable_selected)
## [1] small autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] small heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] medium autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] medium heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] large autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] large heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
# --- PREPARE DATA FOR ANALYSIS --- #
# Add baselines
baselines = ds_ecosystems %>%
filter(time_point == time_point_of_baselines) %>%
select(ecosystem_ID,
all_of(response_variable_selected)) %>%
rename(baseline = all_of(response_variable_selected))
data_for_analysis = ds_ecosystems %>%
left_join(baselines)
# Filter data
data_for_analysis = data_for_analysis %>%
filter(trophic_type == trophy_selected,
time_point %in% time_points_model,
!is.na(!!sym(response_variable_selected))) %>%
mutate(ecosystem_ID = as.character(ecosystem_ID),
size = NA,
size = case_when(ecosystem_size == "small" ~ "S",
ecosystem_size == "medium" ~ "M",
ecosystem_size == "large" ~ "L",
TRUE ~ size),
size = factor(size, levels = c("S", "M", "L")),
size = as.character(size))
# --- PLOT DATA FOR ANALYSIS WITH MEANS --- #
plot.ecosystems.points(data_for_analysis,
response_variable_selected,
legend_row_n_input = 3)
# --- PLOT DATA FOR ANALYSIS WITH SINGLE REPLICATES --- #
plot.ecosystems.replicates.one.by.one(data_for_analysis,
ecosystem_size_and_trophy_selected,
response_variable_selected)
## [1] small heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] medium heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] large heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
# --- DEFINE FORMULA --- #
formula = paste("get(response_variable_selected) ~",
"size * connection +",
"(1 | baseline) +",
"(1 | day)") %>%
print()
## [1] "get(response_variable_selected) ~ size * connection + (1 | baseline) + (1 | day)"
formula = as.formula(formula)
# --- CREATE MODEL (WITH NORMAL DISTRIBUTION) --- #
model = lmer(formula,
data = data_for_analysis,
REML = FALSE,
control = lmerControl(optimizer = "Nelder_Mead"))
# --- PLOT RESIDUALS --- #
qqnorm(resid(model))
qqline(resid(model))
create.res.vs.fit.ecosystems(model,
data_for_analysis)
# --- SHOW MODEL SUMMARY --- #
print(summary(model), digits = 3)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: formula
## Data: data_for_analysis
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## AIC BIC logLik deviance df.resid
## 48.0 70.5 -15.0 30.0 81
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.839 -0.601 0.153 0.573 2.670
##
## Random effects:
## Groups Name Variance Std.Dev.
## baseline (Intercept) 0.0000 0.000
## day (Intercept) 0.0000 0.000
## Residual 0.0817 0.286
## Number of obs: 90, groups: baseline, 30; day, 3
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1.2953 0.0738 90.0000 17.55 <2e-16 ***
## sizeM -0.0258 0.1044 90.0000 -0.25 0.806
## sizeS -0.2738 0.1044 90.0000 -2.62 0.010 *
## connectionconnected 0.0643 0.1044 90.0000 0.62 0.539
## sizeM:connectionconnected 0.1473 0.1476 90.0000 1.00 0.321
## sizeS:connectionconnected -0.2743 0.1476 90.0000 -1.86 0.066 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) sizeM sizeS cnnctn szM:cn
## sizeM -0.707
## sizeS -0.707 0.500
## cnnctncnnct -0.707 0.500 0.500
## szM:cnnctnc 0.500 -0.707 -0.354 -0.707
## szS:cnnctnc 0.500 -0.354 -0.707 -0.707 0.500
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# --- PERFORM ANOVA (GAUSSIAN) --- #
ANOVA = anova(model) %>%
print()
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## size 3.8163 1.90815 2 90 23.3480 6.791e-09 ***
## connection 0.0109 0.01092 1 90 0.1336 0.71562
## size:connection 0.6866 0.34329 2 90 4.2005 0.01803 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# --- ESTIMATE MARGINAL MEANS --- #
emmeans_output = emmeans(model,
specs = pairwise ~ size:connection,
adjust = "tukey",
bias.adj = TRUE,
lmer.df = "satterthwaite")
# --- CODE THE CONTRAST LEVELS --- #
emmeans_output
## $emmeans
## size connection emmean SE df lower.CL upper.CL
## L unconnected 1.295 0.0738 90 1.149 1.442
## M unconnected 1.270 0.0738 90 1.123 1.416
## S unconnected 1.022 0.0738 90 0.875 1.168
## L connected 1.360 0.0738 90 1.213 1.506
## M connected 1.481 0.0738 90 1.335 1.628
## S connected 0.812 0.0738 90 0.665 0.958
##
## Degrees-of-freedom method: satterthwaite
## Results are given on the get (not the response) scale.
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df t.ratio p.value
## L unconnected - M unconnected 0.0258 0.104 90 0.247 0.9999
## L unconnected - S unconnected 0.2738 0.104 90 2.623 0.1023
## L unconnected - L connected -0.0643 0.104 90 -0.616 0.9896
## L unconnected - M connected -0.1859 0.104 90 -1.781 0.4833
## L unconnected - S connected 0.4837 0.104 90 4.634 0.0002
## M unconnected - S unconnected 0.2480 0.104 90 2.376 0.1759
## M unconnected - L connected -0.0901 0.104 90 -0.863 0.9542
## M unconnected - M connected -0.2117 0.104 90 -2.028 0.3352
## M unconnected - S connected 0.4579 0.104 90 4.387 0.0004
## S unconnected - L connected -0.3381 0.104 90 -3.239 0.0202
## S unconnected - M connected -0.4597 0.104 90 -4.403 0.0004
## S unconnected - S connected 0.2099 0.104 90 2.011 0.3444
## L connected - M connected -0.1215 0.104 90 -1.164 0.8524
## L connected - S connected 0.5480 0.104 90 5.250 <.0001
## M connected - S connected 0.6696 0.104 90 6.414 <.0001
##
## Note: contrasts are still on the get scale. Consider using
## regrid() if you want contrasts of back-transformed estimates.
## Degrees-of-freedom method: satterthwaite
## P value adjustment: tukey method for comparing a family of 6 estimates
L_unconnected = c(1, 0, 0, 0, 0, 0)
M_unconnected = c(0, 1, 0, 0, 0, 0)
S_unconnected = c(0, 0, 1, 0, 0, 0)
L_connected = c(0, 0, 0, 1, 0, 0)
M_connected = c(0, 0, 0, 0, 1, 0)
S_connected = c(0, 0, 0, 0, 0, 1)
# --- CALCULATE CONTRASTS YOU ARE INTERESTED IN (T-RATIO, USED FOR RESPONSE VARIABLES WITH NORMAL DISTRIBUTION)--- #
n_of_digits = 3
contrasts = contrast(emmeans_output,
method = list("small connection effect" = S_connected - S_unconnected,
"medium connection effect" = M_connected - M_unconnected,
"large connection effect" = L_connected - L_unconnected,
"Size effect in unconnected (L - S)" = L_unconnected - S_unconnected,
"Size effect in unconnected (M - S)" = M_unconnected - S_unconnected,
"Size effect in connected (L - S)" = L_connected - S_connected,
"Size effect in connected (M - S)" = M_connected - S_connected)) %>%
as.data.frame() %>%
mutate(p.value = round(p.value, digits = n_of_digits),
estimate = round(estimate, digits = n_of_digits),
SE = round(SE, digits = n_of_digits),
df = round(df, digits = n_of_digits),
t.ratio = round(t.ratio, digits = n_of_digits),
e = "",
e = ifelse(p.value > 0.1,
"",
e),
e = ifelse(p.value < 0.05,
"*",
e),
e = ifelse(p.value < 0.01,
"**",
e),
e = ifelse(p.value < 0.001,
"***",
e)) %>%
rename(" " = e)
# --- CALCULATE CONTRASTS YOU ARE INTERESTED IN (Z RATIO, USED FOR REPONSE VARIABLES WITH TWEEDIE DISRIBUTION) --- #
n_of_digits = 3
contrasts = contrast(emmeans_output,
method = list("small connection effect" = S_connected - S_unconnected,
"medium connection effect" = M_connected - M_unconnected,
"large connection effect" = L_connected - L_unconnected,
"Size effect in unconnected (L - S)" = L_unconnected - S_unconnected,
"Size effect in unconnected (M - S)" = M_unconnected - S_unconnected,
"Size effect in connected (L - S)" = L_connected - S_connected,
"Size effect in connected (M - S)" = M_connected - S_connected)) %>%
as.data.frame() %>%
mutate(p.value = round(p.value, digits = n_of_digits),
estimate = round(estimate, digits = n_of_digits),
SE = round(SE, digits = n_of_digits),
df = round(df, digits = n_of_digits),
z.ratio = round(z.ratio, digits = n_of_digits),
e = "",
e = ifelse(p.value > 0.1,
"",
e),
e = ifelse(p.value < 0.05,
"*",
e),
e = ifelse(p.value < 0.01,
"**",
e),
e = ifelse(p.value < 0.001,
"***",
e)) %>%
rename(" " = e)
# --- SHOW RESULTS --- #
ANOVA
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## size 3.8163 1.90815 2 90 23.3480 6.791e-09 ***
## connection 0.0109 0.01092 1 90 0.1336 0.71562
## size:connection 0.6866 0.34329 2 90 4.2005 0.01803 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
contrasts
## contrast estimate SE df t.ratio p.value
## 1 small connection effect -0.210 0.104 90 -2.011 0.047 *
## 2 medium connection effect 0.212 0.104 90 2.028 0.046 *
## 3 large connection effect 0.064 0.104 90 0.616 0.539
## 4 Size effect in unconnected (L - S) 0.274 0.104 90 2.623 0.010 *
## 5 Size effect in unconnected (M - S) 0.248 0.104 90 2.376 0.020 *
## 6 Size effect in connected (L - S) 0.548 0.104 90 5.250 0.000 ***
## 7 Size effect in connected (M - S) 0.670 0.104 90 6.414 0.000 ***
# --- SAVE PLOT --- #
# Generate the image where to save the plot
pdf(file = here("..",
"3_results",
"figures",
"paper",
"ecosystems_shannon_hetero.pdf"),
width = paper_width_inches,
height = paper_height_inches)
# Save image to the file
plot
# Close the current graphics device to properly save the image
dev.off()
response_variable_selected = "evenness_pielou"
distribution_selected = "gaussian"
# --- PLOT ORIGINAL DATA WITH MEANS --- #
plot = plot.ecosystems.points(ds_ecosystems %>%
filter(trophic_type == trophy_selected),
response_variable_selected,
legend_row_n_input = 3)
plot
# --- PLOT ORIGINAL DATA WITH SINGLE REPLICATES --- #
plot.ecosystems.replicates.one.by.one(ds_ecosystems,
ecosystem_size_and_trophy_selected,
response_variable_selected)
## [1] small autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## Warning: Removed 50 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Warning in max(f): no non-missing arguments to max; returning -Inf
## Warning: Computation failed in `stat_summary()`.
## Caused by error in `seq_len()`:
## ! argument must be coercible to non-negative integer
## [1] small heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_summary()`).
## [1] medium autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## Warning: Removed 50 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Warning in max(f): no non-missing arguments to max; returning -Inf
## Warning: Computation failed in `stat_summary()`.
## Caused by error in `seq_len()`:
## ! argument must be coercible to non-negative integer
## [1] medium heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] large autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## Warning: Removed 50 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Warning in max(f): no non-missing arguments to max; returning -Inf
## Warning: Computation failed in `stat_summary()`.
## Caused by error in `seq_len()`:
## ! argument must be coercible to non-negative integer
## [1] large heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
# --- PREPARE DATA FOR ANALYSIS --- #
# Add baselines
baselines = ds_ecosystems %>%
filter(time_point == time_point_of_baselines) %>%
select(ecosystem_ID,
all_of(response_variable_selected)) %>%
rename(baseline = all_of(response_variable_selected))
data_for_analysis = ds_ecosystems %>%
left_join(baselines)
# Filter data
data_for_analysis = data_for_analysis %>%
filter(trophic_type == trophy_selected,
time_point %in% time_points_model,
!is.na(!!sym(response_variable_selected))) %>%
mutate(ecosystem_ID = as.character(ecosystem_ID),
size = NA,
size = case_when(ecosystem_size == "small" ~ "S",
ecosystem_size == "medium" ~ "M",
ecosystem_size == "large" ~ "L",
TRUE ~ size),
size = factor(size, levels = c("S", "M", "L")),
size = as.character(size))
# --- PLOT DATA FOR ANALYSIS WITH MEANS --- #
plot.ecosystems.points(data_for_analysis,
response_variable_selected,
legend_row_n_input = 3)
# --- PLOT DATA FOR ANALYSIS WITH SINGLE REPLICATES --- #
plot.ecosystems.replicates.one.by.one(data_for_analysis,
ecosystem_size_and_trophy_selected,
response_variable_selected)
## [1] small heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] medium heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] large heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
# --- DEFINE FORMULA --- #
formula = paste("get(response_variable_selected) ~",
"size * connection +",
"(1 | baseline) +",
"(1 | day)") %>%
print()
## [1] "get(response_variable_selected) ~ size * connection + (1 | baseline) + (1 | day)"
formula = as.formula(formula)
# --- CREATE MODEL (WITH NORMAL DISTRIBUTION) --- #
model = lmer(formula,
data = data_for_analysis,
REML = FALSE,
control = lmerControl(optimizer = "Nelder_Mead"))
# --- PLOT RESIDUALS --- #
qqnorm(resid(model))
qqline(resid(model))
create.res.vs.fit.ecosystems(model,
data_for_analysis)
# --- SHOW MODEL SUMMARY --- #
print(summary(model), digits = 3)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: formula
## Data: data_for_analysis
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## AIC BIC logLik deviance df.resid
## -70.5 -48.2 44.3 -88.5 79
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.550 -0.476 0.093 0.661 1.790
##
## Random effects:
## Groups Name Variance Std.Dev.
## baseline (Intercept) 0.0000 0.000
## day (Intercept) 0.0000 0.000
## Residual 0.0214 0.146
## Number of obs: 88, groups: baseline, 30; day, 3
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.6881 0.0378 88.0000 18.21 <2e-16 ***
## sizeM 0.0233 0.0534 88.0000 0.44 0.66
## sizeS -0.0576 0.0534 88.0000 -1.08 0.28
## connectionconnected 0.0671 0.0534 88.0000 1.26 0.21
## sizeM:connectionconnected 0.0140 0.0756 88.0000 0.18 0.85
## sizeS:connectionconnected 0.0393 0.0770 88.0000 0.51 0.61
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) sizeM sizeS cnnctn szM:cn
## sizeM -0.707
## sizeS -0.707 0.500
## cnnctncnnct -0.707 0.500 0.500
## szM:cnnctnc 0.500 -0.707 -0.354 -0.707
## szS:cnnctnc 0.491 -0.347 -0.694 -0.694 0.491
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# --- PERFORM ANOVA (GAUSSIAN) --- #
ANOVA = anova(model) %>%
print()
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## size 0.067268 0.033634 2 88 1.5706 0.213687
## connection 0.157998 0.157998 1 88 7.3782 0.007948 **
## size:connection 0.005699 0.002850 2 88 0.1331 0.875583
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# --- ESTIMATE MARGINAL MEANS --- #
emmeans_output = emmeans(model,
specs = pairwise ~ size:connection,
adjust = "tukey",
bias.adj = TRUE,
lmer.df = "satterthwaite")
# --- CODE THE CONTRAST LEVELS --- #
emmeans_output
## $emmeans
## size connection emmean SE df lower.CL upper.CL
## L unconnected 0.688 0.0378 88 0.613 0.763
## M unconnected 0.711 0.0378 88 0.636 0.786
## S unconnected 0.630 0.0378 88 0.555 0.706
## L connected 0.755 0.0378 88 0.680 0.830
## M connected 0.792 0.0378 88 0.717 0.867
## S connected 0.737 0.0406 88 0.656 0.818
##
## Degrees-of-freedom method: satterthwaite
## Results are given on the get (not the response) scale.
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df t.ratio p.value
## L unconnected - M unconnected -0.0233 0.0534 88 -0.435 0.9980
## L unconnected - S unconnected 0.0576 0.0534 88 1.078 0.8890
## L unconnected - L connected -0.0671 0.0534 88 -1.256 0.8077
## L unconnected - M connected -0.1043 0.0534 88 -1.953 0.3779
## L unconnected - S connected -0.0488 0.0555 88 -0.881 0.9502
## M unconnected - S unconnected 0.0808 0.0534 88 1.513 0.6569
## M unconnected - L connected -0.0439 0.0534 88 -0.821 0.9630
## M unconnected - M connected -0.0811 0.0534 88 -1.517 0.6542
## M unconnected - S connected -0.0256 0.0555 88 -0.461 0.9973
## S unconnected - L connected -0.1247 0.0534 88 -2.334 0.1919
## S unconnected - M connected -0.1619 0.0534 88 -3.030 0.0366
## S unconnected - S connected -0.1064 0.0555 88 -1.919 0.3977
## L connected - M connected -0.0372 0.0534 88 -0.697 0.9819
## L connected - S connected 0.0183 0.0555 88 0.330 0.9995
## M connected - S connected 0.0555 0.0555 88 1.001 0.9164
##
## Note: contrasts are still on the get scale. Consider using
## regrid() if you want contrasts of back-transformed estimates.
## Degrees-of-freedom method: satterthwaite
## P value adjustment: tukey method for comparing a family of 6 estimates
L_unconnected = c(1, 0, 0, 0, 0, 0)
M_unconnected = c(0, 1, 0, 0, 0, 0)
S_unconnected = c(0, 0, 1, 0, 0, 0)
L_connected = c(0, 0, 0, 1, 0, 0)
M_connected = c(0, 0, 0, 0, 1, 0)
S_connected = c(0, 0, 0, 0, 0, 1)
# --- CALCULATE CONTRASTS YOU ARE INTERESTED IN (T-RATIO, USED FOR RESPONSE VARIABLES WITH NORMAL DISTRIBUTION)--- #
n_of_digits = 3
contrasts = contrast(emmeans_output,
method = list("small connection effect" = S_connected - S_unconnected,
"medium connection effect" = M_connected - M_unconnected,
"large connection effect" = L_connected - L_unconnected,
"Size effect in unconnected (L - S)" = L_unconnected - S_unconnected,
"Size effect in unconnected (M - S)" = M_unconnected - S_unconnected,
"Size effect in connected (L - S)" = L_connected - S_connected,
"Size effect in connected (M - S)" = M_connected - S_connected)) %>%
as.data.frame() %>%
mutate(p.value = round(p.value, digits = n_of_digits),
estimate = round(estimate, digits = n_of_digits),
SE = round(SE, digits = n_of_digits),
df = round(df, digits = n_of_digits),
t.ratio = round(t.ratio, digits = n_of_digits),
e = "",
e = ifelse(p.value > 0.1,
"",
e),
e = ifelse(p.value < 0.05,
"*",
e),
e = ifelse(p.value < 0.01,
"**",
e),
e = ifelse(p.value < 0.001,
"***",
e)) %>%
rename(" " = e)
# --- CALCULATE CONTRASTS YOU ARE INTERESTED IN (Z RATIO, USED FOR REPONSE VARIABLES WITH TWEEDIE DISRIBUTION) --- #
n_of_digits = 3
contrasts = contrast(emmeans_output,
method = list("small connection effect" = S_connected - S_unconnected,
"medium connection effect" = M_connected - M_unconnected,
"large connection effect" = L_connected - L_unconnected,
"Size effect in unconnected (L - S)" = L_unconnected - S_unconnected,
"Size effect in unconnected (M - S)" = M_unconnected - S_unconnected,
"Size effect in connected (L - S)" = L_connected - S_connected,
"Size effect in connected (M - S)" = M_connected - S_connected)) %>%
as.data.frame() %>%
mutate(p.value = round(p.value, digits = n_of_digits),
estimate = round(estimate, digits = n_of_digits),
SE = round(SE, digits = n_of_digits),
df = round(df, digits = n_of_digits),
z.ratio = round(z.ratio, digits = n_of_digits),
e = "",
e = ifelse(p.value > 0.1,
"",
e),
e = ifelse(p.value < 0.05,
"*",
e),
e = ifelse(p.value < 0.01,
"**",
e),
e = ifelse(p.value < 0.001,
"***",
e)) %>%
rename(" " = e)
# --- SHOW RESULTS --- #
ANOVA
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## size 0.067268 0.033634 2 88 1.5706 0.213687
## connection 0.157998 0.157998 1 88 7.3782 0.007948 **
## size:connection 0.005699 0.002850 2 88 0.1331 0.875583
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
contrasts
## contrast estimate SE df t.ratio p.value
## 1 small connection effect 0.106 0.055 88 1.919 0.058
## 2 medium connection effect 0.081 0.053 88 1.517 0.133
## 3 large connection effect 0.067 0.053 88 1.256 0.212
## 4 Size effect in unconnected (L - S) 0.058 0.053 88 1.078 0.284
## 5 Size effect in unconnected (M - S) 0.081 0.053 88 1.513 0.134
## 6 Size effect in connected (L - S) 0.018 0.055 88 0.330 0.742
## 7 Size effect in connected (M - S) 0.056 0.055 88 1.001 0.320
response_variable_selected = "median_body_size_µm2"
distribution_selected = "tweedie"
# --- PLOT ORIGINAL DATA WITH MEANS --- #
plot = plot.ecosystems.points(ds_ecosystems %>%
filter(trophic_type == trophy_selected),
response_variable_selected,
legend_row_n_input = 3)
plot
# --- PLOT ORIGINAL DATA WITH SINGLE REPLICATES --- #
plot.ecosystems.replicates.one.by.one(ds_ecosystems,
ecosystem_size_and_trophy_selected,
response_variable_selected)
## [1] small autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] small heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] medium autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] medium heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] large autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] large heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
# --- PREPARE DATA FOR ANALYSIS --- #
# Add baselines
baselines = ds_ecosystems %>%
filter(time_point == time_point_of_baselines) %>%
select(ecosystem_ID,
all_of(response_variable_selected)) %>%
rename(baseline = all_of(response_variable_selected))
data_for_analysis = ds_ecosystems %>%
left_join(baselines)
# Filter data
data_for_analysis = data_for_analysis %>%
filter(trophic_type == trophy_selected,
time_point %in% time_points_model,
!is.na(!!sym(response_variable_selected))) %>%
mutate(ecosystem_ID = as.character(ecosystem_ID),
size = NA,
size = case_when(ecosystem_size == "small" ~ "S",
ecosystem_size == "medium" ~ "M",
ecosystem_size == "large" ~ "L",
TRUE ~ size),
size = factor(size, levels = c("S", "M", "L")),
size = as.character(size))
# --- PLOT DATA FOR ANALYSIS WITH MEANS --- #
plot.ecosystems.points(data_for_analysis,
response_variable_selected,
legend_row_n_input = 3)
# --- PLOT DATA FOR ANALYSIS WITH SINGLE REPLICATES --- #
plot.ecosystems.replicates.one.by.one(data_for_analysis,
ecosystem_size_and_trophy_selected,
response_variable_selected)
## [1] small heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] medium heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] large heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
# --- DEFINE FORMULA --- #
formula = paste("get(response_variable_selected) ~",
"size * connection +",
"(1 | baseline) +",
"(1 | day)") %>%
print()
## [1] "get(response_variable_selected) ~ size * connection + (1 | baseline) + (1 | day)"
formula = as.formula(formula)
# --- CREATE MODEL (WITH TWEEDIE DISTRIBUTION) --- #
if(trophy_selected != "heterotrophic" && response_variable_selected != "median_body_size_µm2"){
model <- glmmTMB::glmmTMB(formula,
data = data_for_analysis,
family = glmmTMB::tweedie(link = "log"))
}
if(trophy_selected == "heterotrophic" && response_variable_selected == "median_body_size_µm2"){
model <- glmmTMB(formula,
data = data_for_analysis,
family = glmmTMB::tweedie(link = "log"),
control = glmmTMBControl(optimizer = optim,
optArgs = list(method = "BFGS")))
}
# --- PLOT RESIDUALS --- #
qqnorm(resid(model))
qqline(resid(model))
create.res.vs.fit.ecosystems(model,
data_for_analysis)
# --- SHOW MODEL SUMMARY --- #
print(summary(model), digits = 3)
## Family: tweedie ( log )
## Formula:
## get(response_variable_selected) ~ size * connection + (1 | baseline) +
## (1 | day)
## Data: data_for_analysis
##
## AIC BIC logLik deviance df.resid
## 1536.9 1561.9 -758.4 1516.9 80
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## baseline (Intercept) 9.18e-08 0.000303
## day (Intercept) 1.72e-02 0.131048
## Number of obs: 90, groups: baseline, 30; day, 3
##
## Dispersion parameter for tweedie family (): 0.052
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 8.45911 0.09588 88.2 <2e-16 ***
## sizeM -0.13249 0.08329 -1.6 0.112
## sizeS -0.01620 0.08332 -0.2 0.846
## connectionconnected -0.00623 0.08331 -0.1 0.940
## sizeM:connectionconnected 0.27709 0.11787 2.4 0.019 *
## sizeS:connectionconnected 0.06794 0.11785 0.6 0.564
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# --- PERFORM ANOVA --- #
ANOVA = car::Anova(model, type = "III") %>%
print()
## Analysis of Deviance Table (Type III Wald chisquare tests)
##
## Response: get(response_variable_selected)
## Chisq Df Pr(>Chisq)
## (Intercept) 7784.2862 1 < 2e-16 ***
## size 3.0125 2 0.22174
## connection 0.0056 1 0.94035
## size:connection 6.0053 2 0.04966 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# --- ESTIMATE MARGINAL MEANS --- #
emmeans_output = emmeans(model,
specs = pairwise ~ size:connection,
adjust = "tukey",
bias.adj = TRUE,
lmer.df = "satterthwaite")
# --- CODE THE CONTRAST LEVELS --- #
emmeans_output
## $emmeans
## size connection emmean SE df asymp.LCL asymp.UCL
## L unconnected 8.46 0.0959 Inf 8.27 8.65
## M unconnected 8.33 0.0959 Inf 8.14 8.51
## S unconnected 8.44 0.0959 Inf 8.25 8.63
## L connected 8.45 0.0959 Inf 8.26 8.64
## M connected 8.60 0.0959 Inf 8.41 8.79
## S connected 8.50 0.0959 Inf 8.32 8.69
##
## Results are given on the log (not the response) scale.
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df z.ratio p.value
## L unconnected - M unconnected 0.13249 0.0833 Inf 1.591 0.6048
## L unconnected - S unconnected 0.01620 0.0833 Inf 0.194 1.0000
## L unconnected - L connected 0.00623 0.0833 Inf 0.075 1.0000
## L unconnected - M connected -0.13837 0.0833 Inf -1.661 0.5578
## L unconnected - S connected -0.04551 0.0834 Inf -0.546 0.9942
## M unconnected - S unconnected -0.11629 0.0833 Inf -1.396 0.7294
## M unconnected - L connected -0.12625 0.0833 Inf -1.515 0.6545
## M unconnected - M connected -0.27086 0.0834 Inf -3.248 0.0148
## M unconnected - S connected -0.17799 0.0833 Inf -2.136 0.2688
## S unconnected - L connected -0.00997 0.0834 Inf -0.119 1.0000
## S unconnected - M connected -0.15457 0.0835 Inf -1.852 0.4321
## S unconnected - S connected -0.06171 0.0833 Inf -0.741 0.9768
## L connected - M connected -0.14461 0.0833 Inf -1.735 0.5083
## L connected - S connected -0.05174 0.0835 Inf -0.620 0.9896
## M connected - S connected 0.09286 0.0835 Inf 1.112 0.8765
##
## Results are given on the log (not the response) scale.
## P value adjustment: tukey method for comparing a family of 6 estimates
L_unconnected = c(1, 0, 0, 0, 0, 0)
M_unconnected = c(0, 1, 0, 0, 0, 0)
S_unconnected = c(0, 0, 1, 0, 0, 0)
L_connected = c(0, 0, 0, 1, 0, 0)
M_connected = c(0, 0, 0, 0, 1, 0)
S_connected = c(0, 0, 0, 0, 0, 1)
# --- CALCULATE CONTRASTS YOU ARE INTERESTED IN (T-RATIO, USED FOR RESPONSE VARIABLES WITH NORMAL DISTRIBUTION)--- #
n_of_digits = 3
contrasts = contrast(emmeans_output,
method = list("small connection effect" = S_connected - S_unconnected,
"medium connection effect" = M_connected - M_unconnected,
"large connection effect" = L_connected - L_unconnected,
"Size effect in unconnected (L - S)" = L_unconnected - S_unconnected,
"Size effect in unconnected (M - S)" = M_unconnected - S_unconnected,
"Size effect in connected (L - S)" = L_connected - S_connected,
"Size effect in connected (M - S)" = M_connected - S_connected)) %>%
as.data.frame() %>%
mutate(p.value = round(p.value, digits = n_of_digits),
estimate = round(estimate, digits = n_of_digits),
SE = round(SE, digits = n_of_digits),
df = round(df, digits = n_of_digits),
t.ratio = round(t.ratio, digits = n_of_digits),
e = "",
e = ifelse(p.value > 0.1,
"",
e),
e = ifelse(p.value < 0.05,
"*",
e),
e = ifelse(p.value < 0.01,
"**",
e),
e = ifelse(p.value < 0.001,
"***",
e)) %>%
rename(" " = e)
# --- CALCULATE CONTRASTS YOU ARE INTERESTED IN (Z RATIO, USED FOR REPONSE VARIABLES WITH TWEEDIE DISRIBUTION) --- #
n_of_digits = 3
contrasts = contrast(emmeans_output,
method = list("small connection effect" = S_connected - S_unconnected,
"medium connection effect" = M_connected - M_unconnected,
"large connection effect" = L_connected - L_unconnected,
"Size effect in unconnected (L - S)" = L_unconnected - S_unconnected,
"Size effect in unconnected (M - S)" = M_unconnected - S_unconnected,
"Size effect in connected (L - S)" = L_connected - S_connected,
"Size effect in connected (M - S)" = M_connected - S_connected)) %>%
as.data.frame() %>%
mutate(p.value = round(p.value, digits = n_of_digits),
estimate = round(estimate, digits = n_of_digits),
SE = round(SE, digits = n_of_digits),
df = round(df, digits = n_of_digits),
z.ratio = round(z.ratio, digits = n_of_digits),
e = "",
e = ifelse(p.value > 0.1,
"",
e),
e = ifelse(p.value < 0.05,
"*",
e),
e = ifelse(p.value < 0.01,
"**",
e),
e = ifelse(p.value < 0.001,
"***",
e)) %>%
rename(" " = e)
# --- SHOW RESULTS --- #
ANOVA
## Analysis of Deviance Table (Type III Wald chisquare tests)
##
## Response: get(response_variable_selected)
## Chisq Df Pr(>Chisq)
## (Intercept) 7784.2862 1 < 2e-16 ***
## size 3.0125 2 0.22174
## connection 0.0056 1 0.94035
## size:connection 6.0053 2 0.04966 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
contrasts
## contrast estimate SE df z.ratio p.value
## 1 small connection effect 0.062 0.083 Inf 0.741 0.459
## 2 medium connection effect 0.271 0.083 Inf 3.248 0.001 **
## 3 large connection effect -0.006 0.083 Inf -0.075 0.940
## 4 Size effect in unconnected (L - S) 0.016 0.083 Inf 0.194 0.846
## 5 Size effect in unconnected (M - S) -0.116 0.083 Inf -1.396 0.163
## 6 Size effect in connected (L - S) -0.052 0.084 Inf -0.620 0.536
## 7 Size effect in connected (M - S) 0.093 0.083 Inf 1.112 0.266
# --- SAVE PLOT --- #
# Generate the image where to save the plot
pdf(file = here("..",
"3_results",
"figures",
"paper",
"ecosystems_median_body_size_hetero.pdf"),
width = paper_width_inches,
height = paper_height_inches)
# Save image to the file
plot
# Close the current graphics device to properly save the image
dev.off()
response_variable_selected = "indiv_per_ml"
distribution_selected = "tweedie"
# --- PLOT ORIGINAL DATA WITH MEANS --- #
plot = plot.ecosystems.points(ds_ecosystems %>%
filter(trophic_type == trophy_selected),
response_variable_selected,
legend_row_n_input = 3)
plot
# --- PLOT ORIGINAL DATA WITH SINGLE REPLICATES --- #
plot.ecosystems.replicates.one.by.one(ds_ecosystems,
ecosystem_size_and_trophy_selected,
response_variable_selected)
## [1] small autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] small heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] medium autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] medium heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] large autotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] large heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
# --- PREPARE DATA FOR ANALYSIS --- #
# Add baselines
baselines = ds_ecosystems %>%
filter(time_point == time_point_of_baselines) %>%
select(ecosystem_ID,
all_of(response_variable_selected)) %>%
rename(baseline = all_of(response_variable_selected))
data_for_analysis = ds_ecosystems %>%
left_join(baselines)
# Filter data
data_for_analysis = data_for_analysis %>%
filter(trophic_type == trophy_selected,
time_point %in% time_points_model,
!is.na(!!sym(response_variable_selected))) %>%
mutate(ecosystem_ID = as.character(ecosystem_ID),
size = NA,
size = case_when(ecosystem_size == "small" ~ "S",
ecosystem_size == "medium" ~ "M",
ecosystem_size == "large" ~ "L",
TRUE ~ size),
size = factor(size, levels = c("S", "M", "L")),
size = as.character(size))
# --- PLOT DATA FOR ANALYSIS WITH MEANS --- #
plot.ecosystems.points(data_for_analysis,
response_variable_selected,
legend_row_n_input = 3)
# --- PLOT DATA FOR ANALYSIS WITH SINGLE REPLICATES --- #
plot.ecosystems.replicates.one.by.one(data_for_analysis,
ecosystem_size_and_trophy_selected,
response_variable_selected)
## [1] small heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] medium heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
## [1] large heterotrophic
## 6 Levels: small autotrophic medium autotrophic ... large heterotrophic
# --- DEFINE FORMULA --- #
formula = paste("get(response_variable_selected) ~",
"size * connection +",
"(1 | baseline) +",
"(1 | day)") %>%
print()
## [1] "get(response_variable_selected) ~ size * connection + (1 | baseline) + (1 | day)"
formula = as.formula(formula)
# --- CREATE MODEL (WITH TWEEDIE DISTRIBUTION) --- #
if(trophy_selected != "heterotrophic" && response_variable_selected != "median_body_size_µm2"){
model <- glmmTMB::glmmTMB(formula,
data = data_for_analysis,
family = glmmTMB::tweedie(link = "log"))
}
if(trophy_selected == "heterotrophic" && response_variable_selected == "median_body_size_µm2"){
model <- glmmTMB(formula,
data = data_for_analysis,
family = glmmTMB::tweedie(link = "log"),
control = glmmTMBControl(optimizer = optim,
optArgs = list(method = "BFGS")))
}
# --- PLOT RESIDUALS --- #
qqnorm(resid(model))
qqline(resid(model))
create.res.vs.fit.ecosystems(model,
data_for_analysis)
# --- SHOW MODEL SUMMARY --- #
print(summary(model), digits = 3)
## Family: tweedie ( log )
## Formula:
## get(response_variable_selected) ~ size * connection + (1 | baseline) +
## (1 | day)
## Data: data_for_analysis
##
## AIC BIC logLik deviance df.resid
## 1536.9 1561.9 -758.4 1516.9 80
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## baseline (Intercept) 9.18e-08 0.000303
## day (Intercept) 1.72e-02 0.131048
## Number of obs: 90, groups: baseline, 30; day, 3
##
## Dispersion parameter for tweedie family (): 0.052
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 8.45911 0.09588 88.2 <2e-16 ***
## sizeM -0.13249 0.08329 -1.6 0.112
## sizeS -0.01620 0.08332 -0.2 0.846
## connectionconnected -0.00623 0.08331 -0.1 0.940
## sizeM:connectionconnected 0.27709 0.11787 2.4 0.019 *
## sizeS:connectionconnected 0.06794 0.11785 0.6 0.564
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# --- PERFORM ANOVA --- #
ANOVA = car::Anova(model, type = "III") %>%
print()
## Analysis of Deviance Table (Type III Wald chisquare tests)
##
## Response: get(response_variable_selected)
## Chisq Df Pr(>Chisq)
## (Intercept) 7784.2862 1 < 2e-16 ***
## size 3.0125 2 0.22174
## connection 0.0056 1 0.94035
## size:connection 6.0053 2 0.04966 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# --- ESTIMATE MARGINAL MEANS --- #
emmeans_output = emmeans(model,
specs = pairwise ~ size:connection,
adjust = "tukey",
bias.adj = TRUE,
lmer.df = "satterthwaite")
# --- CODE THE CONTRAST LEVELS --- #
emmeans_output
## $emmeans
## size connection emmean SE df asymp.LCL asymp.UCL
## L unconnected 8.46 0.0959 Inf 8.27 8.65
## M unconnected 8.33 0.0959 Inf 8.14 8.51
## S unconnected 8.44 0.0959 Inf 8.25 8.63
## L connected 8.45 0.0959 Inf 8.26 8.64
## M connected 8.60 0.0959 Inf 8.41 8.79
## S connected 8.50 0.0959 Inf 8.32 8.69
##
## Results are given on the log (not the response) scale.
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df z.ratio p.value
## L unconnected - M unconnected 0.13249 0.0833 Inf 1.591 0.6048
## L unconnected - S unconnected 0.01620 0.0833 Inf 0.194 1.0000
## L unconnected - L connected 0.00623 0.0833 Inf 0.075 1.0000
## L unconnected - M connected -0.13837 0.0833 Inf -1.661 0.5578
## L unconnected - S connected -0.04551 0.0834 Inf -0.546 0.9942
## M unconnected - S unconnected -0.11629 0.0833 Inf -1.396 0.7294
## M unconnected - L connected -0.12625 0.0833 Inf -1.515 0.6545
## M unconnected - M connected -0.27086 0.0834 Inf -3.248 0.0148
## M unconnected - S connected -0.17799 0.0833 Inf -2.136 0.2688
## S unconnected - L connected -0.00997 0.0834 Inf -0.119 1.0000
## S unconnected - M connected -0.15457 0.0835 Inf -1.852 0.4321
## S unconnected - S connected -0.06171 0.0833 Inf -0.741 0.9768
## L connected - M connected -0.14461 0.0833 Inf -1.735 0.5083
## L connected - S connected -0.05174 0.0835 Inf -0.620 0.9896
## M connected - S connected 0.09286 0.0835 Inf 1.112 0.8765
##
## Results are given on the log (not the response) scale.
## P value adjustment: tukey method for comparing a family of 6 estimates
L_unconnected = c(1, 0, 0, 0, 0, 0)
M_unconnected = c(0, 1, 0, 0, 0, 0)
S_unconnected = c(0, 0, 1, 0, 0, 0)
L_connected = c(0, 0, 0, 1, 0, 0)
M_connected = c(0, 0, 0, 0, 1, 0)
S_connected = c(0, 0, 0, 0, 0, 1)
# --- CALCULATE CONTRASTS YOU ARE INTERESTED IN (T-RATIO, USED FOR RESPONSE VARIABLES WITH NORMAL DISTRIBUTION)--- #
n_of_digits = 3
contrasts = contrast(emmeans_output,
method = list("small connection effect" = S_connected - S_unconnected,
"medium connection effect" = M_connected - M_unconnected,
"large connection effect" = L_connected - L_unconnected,
"Size effect in unconnected (L - S)" = L_unconnected - S_unconnected,
"Size effect in unconnected (M - S)" = M_unconnected - S_unconnected,
"Size effect in connected (L - S)" = L_connected - S_connected,
"Size effect in connected (M - S)" = M_connected - S_connected)) %>%
as.data.frame() %>%
mutate(p.value = round(p.value, digits = n_of_digits),
estimate = round(estimate, digits = n_of_digits),
SE = round(SE, digits = n_of_digits),
df = round(df, digits = n_of_digits),
t.ratio = round(t.ratio, digits = n_of_digits),
e = "",
e = ifelse(p.value > 0.1,
"",
e),
e = ifelse(p.value < 0.05,
"*",
e),
e = ifelse(p.value < 0.01,
"**",
e),
e = ifelse(p.value < 0.001,
"***",
e)) %>%
rename(" " = e)
# --- CALCULATE CONTRASTS YOU ARE INTERESTED IN (Z RATIO, USED FOR REPONSE VARIABLES WITH TWEEDIE DISRIBUTION) --- #
n_of_digits = 3
contrasts = contrast(emmeans_output,
method = list("small connection effect" = S_connected - S_unconnected,
"medium connection effect" = M_connected - M_unconnected,
"large connection effect" = L_connected - L_unconnected,
"Size effect in unconnected (L - S)" = L_unconnected - S_unconnected,
"Size effect in unconnected (M - S)" = M_unconnected - S_unconnected,
"Size effect in connected (L - S)" = L_connected - S_connected,
"Size effect in connected (M - S)" = M_connected - S_connected)) %>%
as.data.frame() %>%
mutate(p.value = round(p.value, digits = n_of_digits),
estimate = round(estimate, digits = n_of_digits),
SE = round(SE, digits = n_of_digits),
df = round(df, digits = n_of_digits),
z.ratio = round(z.ratio, digits = n_of_digits),
e = "",
e = ifelse(p.value > 0.1,
"",
e),
e = ifelse(p.value < 0.05,
"*",
e),
e = ifelse(p.value < 0.01,
"**",
e),
e = ifelse(p.value < 0.001,
"***",
e)) %>%
rename(" " = e)
# --- SHOW RESULTS --- #
ANOVA
## Analysis of Deviance Table (Type III Wald chisquare tests)
##
## Response: get(response_variable_selected)
## Chisq Df Pr(>Chisq)
## (Intercept) 7784.2862 1 < 2e-16 ***
## size 3.0125 2 0.22174
## connection 0.0056 1 0.94035
## size:connection 6.0053 2 0.04966 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
contrasts
## contrast estimate SE df z.ratio p.value
## 1 small connection effect 0.062 0.083 Inf 0.741 0.459
## 2 medium connection effect 0.271 0.083 Inf 3.248 0.001 **
## 3 large connection effect -0.006 0.083 Inf -0.075 0.940
## 4 Size effect in unconnected (L - S) 0.016 0.083 Inf 0.194 0.846
## 5 Size effect in unconnected (M - S) -0.116 0.083 Inf -1.396 0.163
## 6 Size effect in connected (L - S) -0.052 0.084 Inf -0.620 0.536
## 7 Size effect in connected (M - S) 0.093 0.083 Inf 1.112 0.266
print_dominance("small heterotrophic unconnected")
print_dominance("small heterotrophic connected")
for(time_point_i in 2:4){
print(paste("Time point ", time_point_i))
p = ds_species_effect_sizes_d %>%
filter(ecosystem_type == "small heterotrophic connected",
time_point == time_point_i,
species != "Eug") %>%
ggplot(aes(x = species,
y = effect_size)) +
geom_pointrange(aes(ymin = lower_95_CI, ymax = upper_95_CI)) +
geom_hline(yintercept = 0, linetype = "dotted") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = legend_position,
legend.key.width = unit(legend_width_cm, "cm"))
print(p)
}
## [1] "Time point 2"
## [1] "Time point 3"
## [1] "Time point 4"
print_dominance("medium heterotrophic unconnected")
print_dominance("medium heterotrophic connected")
for(time_point_i in 2:4){
print(paste("Time point ", time_point_i))
p = ds_species_effect_sizes_d %>%
filter(ecosystem_type == "medium heterotrophic connected",
time_point == time_point_i,
species != "Eug") %>%
ggplot(aes(x = species,
y = effect_size)) +
geom_pointrange(aes(ymin = lower_95_CI, ymax = upper_95_CI)) +
geom_hline(yintercept = 0, linetype = "dotted") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = legend_position,
legend.key.width = unit(legend_width_cm, "cm"))
print(p)
}
## [1] "Time point 2"
## [1] "Time point 3"
## [1] "Time point 4"
print_dominance("large heterotrophic unconnected")
print_dominance("large heterotrophic connected")
for(time_point_i in 2:4){
print(paste("Time point ", time_point_i))
p = ds_species_effect_sizes_d %>%
filter(ecosystem_type == "large heterotrophic connected",
time_point == time_point_i,
species != "Eug") %>%
ggplot(aes(x = species,
y = effect_size)) +
geom_pointrange(aes(ymin = lower_95_CI, ymax = upper_95_CI)) +
geom_hline(yintercept = 0, linetype = "dotted") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = legend_position,
legend.key.width = unit(legend_width_cm, "cm"))
print(p)
}
## [1] "Time point 2"
## [1] "Time point 3"
## [1] "Time point 4"
The connection affected community composition in small, medium, and large heterotrophic patches. In small heterotrophic patches, the connection increased the dominance of Pau (t3) and decreased the one of Col (t3-4) and Pca (t4). In the medium heterotrophic patches, the connection increased the dominance of Ble (t2-3) and Pau (t4). In the large heterotrophic patches, the connection increased the dominance of Ble (t2) and Col (t2).
print_dominance("Small heterotrophic unconnected")
print_dominance("Small heterotrophic connected")
for(time_point_i in 2:4){
print(paste("Time point ", time_point_i))
p = ds_species_effect_sizes_LRR %>%
filter(ecosystem_type == "Small heterotrophic connected",
time_point == time_point_i,
species != "Eug") %>%
ggplot(aes(x = species,
y = effect_size)) +
geom_pointrange(aes(ymin = lower_95_CI, ymax = upper_95_CI)) +
geom_hline(yintercept = 0, linetype = "dotted") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = legend_position,
legend.key.width = unit(legend_width_cm, "cm"))
print(p)
}
## [1] "Time point 2"
## [1] "Time point 3"
## [1] "Time point 4"
print_dominance("Medium heterotrophic unconnected")
print_dominance("Medium heterotrophic connected")
for(time_point_i in 2:4){
print(paste("Time point ", time_point_i))
p = ds_species_effect_sizes_LRR %>%
filter(ecosystem_type == "Medium heterotrophic connected",
time_point == time_point_i,
species != "Eug") %>%
ggplot(aes(x = species,
y = effect_size)) +
geom_pointrange(aes(ymin = lower_95_CI, ymax = upper_95_CI)) +
geom_hline(yintercept = 0, linetype = "dotted") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = legend_position,
legend.key.width = unit(legend_width_cm, "cm"))
print(p)
}
## [1] "Time point 2"
## [1] "Time point 3"
## [1] "Time point 4"
print_dominance("Large heterotrophic unconnected")
print_dominance("Large heterotrophic connected")
for(time_point_i in 2:4){
print(paste("Time point ", time_point_i))
p = ds_species_effect_sizes_LRR %>%
filter(ecosystem_type == "Large heterotrophic connected",
time_point == time_point_i,
species != "Eug") %>%
ggplot(aes(x = species,
y = effect_size)) +
geom_pointrange(aes(ymin = lower_95_CI, ymax = upper_95_CI)) +
geom_hline(yintercept = 0, linetype = "dotted") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = legend_position,
legend.key.width = unit(legend_width_cm, "cm"))
print(p)
}
## [1] "Time point 2"
## [1] "Time point 3"
## [1] "Time point 4"
The connection in small ecosystems increases Ble (t3) and Pau and Cep (t4). In medium ecosystems it increases Ble (t3), Spi (t4), and decreases Cep and Col (t5). In large ecosystems it increases Col and Ble (t2), and increases Spi (t5).
We want to know whether there was a correlation between biodiversity (richness/shannon/eveness) and productivity. We include all the time points in all the heterotrophic ecosystems (small/medium/large, connected/unconnected).
ds_ecosystems %>%
filter(trophic_type == "heterotrophic") %>%
ggplot(aes(x = species_richness,
y = bioarea_mm2_per_ml)) +
geom_point() +
xlim(0, length(protist_species)) +
labs(x = axis_names$axis_name[axis_names$variable == "species_richness"],
y = axis_names$axis_name[axis_names$variable == "bioarea_mm2_per_ml"]) +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = legend_position,
legend.key.width = unit(legend_width_cm, "cm"))
ds_ecosystems %>%
filter(trophic_type == "heterotrophic") %>%
ggplot(aes(x = shannon,
y = bioarea_mm2_per_ml)) +
geom_point() +
labs(x = axis_names$axis_name[axis_names$variable == "shannon"],
y = axis_names$axis_name[axis_names$variable == "bioarea_mm2_per_ml"]) +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = legend_position,
legend.key.width = unit(legend_width_cm, "cm"))
ds_ecosystems %>%
filter(trophic_type == "heterotrophic",
is.na(evenness_pielou) != TRUE) %>%
ggplot(aes(x = evenness_pielou,
y = bioarea_mm2_per_ml)) +
geom_point() +
labs(x = axis_names$axis_name[axis_names$variable == "evenness_pielou"],
y = axis_names$axis_name[axis_names$variable == "bioarea_mm2_per_ml"]) +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = legend_position,
legend.key.width = unit(legend_width_cm, "cm"))
# --- SET RESULT PARAMETERS --- #
colour_autotrophs = "#2ca25f"
colour_heterotrophs = "#3182bd"
treatment_colours = c("autotrophic" = colour_autotrophs,
"heterotrophic" = colour_heterotrophs,
"Small heterotrophic connected"= colour_heterotrophs,
"Medium heterotrophic connected"= colour_heterotrophs,
"Large heterotrophic connected" = colour_heterotrophs,
"Small autotrophic connected" = colour_autotrophs,
"Medium autotrophic connected" = colour_autotrophs,
"Large autotrophic connected" = colour_autotrophs,
"Small heterotrophic unconnected"= colour_heterotrophs,
"Medium heterotrophic unconnected"= colour_heterotrophs,
"Large heterotrophic unconnected" = colour_heterotrophs,
"Small autotrophic unconnected" = colour_autotrophs,
"Medium autotrophic unconnected" = colour_autotrophs,
"Large autotrophic unconnected" = colour_autotrophs,
"Small heterotrophic"= colour_heterotrophs,
"Medium heterotrophic"= colour_heterotrophs,
"Large heterotrophic" = colour_heterotrophs,
"Small autotrophic" = colour_autotrophs,
"Medium autotrophic" = colour_autotrophs,
"Large autotrophic" = colour_autotrophs,
"autotrophic-dominated" = "#5ab4ac",
"equally-dominated" = "#969696",
"heterotrophic-dominated" = "#d8b365",
"autotrophic-dominated connected" = "#5ab4ac",
"equally-dominated connected" = "#969696",
"heterotrophic-dominated connected" = "#d8b365",
"autotrophic-dominated unconnected" = "#5ab4ac",
"equally-dominated unconnected" = "#969696",
"heterotrophic-dominated unconnected" = "#d8b365")
Figure 2. The effect of the autotrophic-heterotrophic spatial feedback on meta-ecosystem biomass was tuned by patch size. The effects of the spatial feedback on total meta-ecosystem biomass were positive in Autotrophic Dominated meta-ecosystems (they had higher total bioarea than Autotrophic Dominated isolated), negative in Heterotrophic Dominated meta-ecosystems (they had lower total bioarea than Heterotrophic Dominated isolated), and non-significant in Equally Dominated meta-ecosystems (they had the same total bioarea than Equally Dominated isolated). Heterotrophic-dominated meta-ecosystems: meta-ecosystems with one big heterotrophic and one small autotrophic patch. Autotrophic-dominated meta-ecosystems: meta-ecosystems with one big autotrophic and one small heterotrophic patch. Heterotrophic-dominated isolated: systems composed of one big heterotrophic and one small autotrophic isolated patch. Autotrophic-dominated isolated: systems with one big autotrophic and one small heterotrophic isolated patch.
# --- CREATE THE META-ECOSYSTEM PLOT FOR THE PAPER --- #
# Generate the image where to save the plot
pdf(file = here("..",
"3_results",
"figures",
"paper",
"metaecosystems_1.pdf"),
width = paper_width_inches,
height = paper_height_inches)
# Create plot
p = plot.metaecos.points(data = ds_metaecosystems,
response_variable = "total_metaecosystem_bioarea_mm2")
# Modify the plot to be saved using ggarrange
ggpubr::ggarrange(p +
theme(plot.margin = unit(c(ggarrange_margin_left,
ggarrange_margin_right,
ggarrange_margin_bottom,
ggarrange_margin_left),
"cm")) +
scale_x_continuous(breaks = unique(ds_ecosystems$day)),
align = "v",
label.x = 0.1,
label.y = 0.8)
# Close the current graphics device to properly save the image
dev.off()
# --- SHOW THE META-ECOSYSTEM PLOT FOR THE PAPER --- #
p
# --- CONSTRUCT ECOSYSTEM BIOMASS PLOTS --- #
# Set parameters
response_variable = "bioarea_mm2_per_ml"
ecosystem_size_and_trophy_p_1 = c("large autotrophic",
"small heterotrophic")
ecosystem_size_and_trophy_p_2 = c("medium autotrophic",
"medium heterotrophic")
ecosystem_size_and_trophy_p_3 = c("large heterotrophic",
"small autotrophic")
legend_row_n_input = 2
y_min = 0
y_max = 21
# Prepare data for plotting
data_plot_1 = ds_ecosystems %>%
filter(ecosystem_size_and_trophy %in% ecosystem_size_and_trophy_p_1)
data_plot_2 = ds_ecosystems %>%
filter(ecosystem_size_and_trophy %in% ecosystem_size_and_trophy_p_2)
data_plot_3 = ds_ecosystems %>%
filter(ecosystem_size_and_trophy %in% ecosystem_size_and_trophy_p_3)
# Construct plots
p_1 = plot.ecosystems.points.paper(data_plot_1,
response_variable,
legend_row_n_input) +
theme(plot.margin = unit(c(ggarrange_margin_left,
ggarrange_margin_right,
ggarrange_margin_bottom,
ggarrange_margin_left),
"cm")) +
ylim(y_min, y_max)
p_2 = plot.ecosystems.points.paper(data_plot_2,
response_variable,
legend_row_n_input) +
theme(plot.margin = unit(c(ggarrange_margin_left,
ggarrange_margin_right,
ggarrange_margin_bottom,
ggarrange_margin_left),
"cm")) +
ylim(y_min, y_max)
p_3 = plot.ecosystems.points.paper(data_plot_3,
response_variable,
legend_row_n_input) +
theme(plot.margin = unit(c(ggarrange_margin_left,
ggarrange_margin_right,
ggarrange_margin_bottom,
ggarrange_margin_left),
"cm")) +
ylim(y_min, y_max)
# Combine plots
p_combined = ggarrange(p_1 +
rremove("xlab") +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank()),
p_2 +
rremove("xlab") +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "none"),
p_3 +
scale_x_continuous(breaks = unique(data_plot_2$day)) +
theme(legend.position = "none"),
heights = c(0.95, 0.70, 0.8),
nrow = 3,
align = "v",
labels = c("autotrophic-dominated",
"equally-dominated",
"heterotrophic-dominated"),
label.x = c(-0.080, -0.05, -0.095),
label.y = c(0.70, 0.977, 0.979))
# --- SHOW ECOSYSTEM BIOMASS PLOTS --- #
p_combined
# --- SAVE ECOSYSTEM BIOMASS PLOTS --- #
# Generate the image where to save the plot
pdf(file = here("..",
"3_results",
"figures",
"paper",
"ecosystems_biomass.pdf"),
width = paper_width_inches,
height = paper_height_inches)
# Save image to the file
p_combined
# Close the current graphics device to properly save the image
dev.off()
Create plots for presentations on how meta-ecosystem biomass changed across connected and unconnected meta-ecosystem types. Start from plotting an empty figures and then show one line at the time.
# --- SET PARAMETERS FOR PLOTTING --- #
response_variable = "total_metaecosystem_bioarea_mm2"
metaeco_type_n_connection_selected = c("equally-dominated unconnected",
"equally-dominated connected",
"autotrophic-dominated unconnected",
"autotrophic-dominated connected",
"heterotrophic-dominated unconnected",
"heterotrophic-dominated connected")
x_min = 0
x_max = sampling_days[length(sampling_days)]
y_min = 50
y_max = 750
plot_label_x = 0.1
plot_label_y = 0.8
# --- CREATE AN EMPTY PLOT --- #
# Set parameters
connections_selected = ""
types_selected = ""
# Generate the image where to save the plot
pdf(file = here("..",
"3_results",
"figures",
"presentations",
"metaecosystems_0.pdf"),
width = presentation_figure_width_inches,
height = presentation_figure_height_inches)
# Prepare data for plotting
data_for_plotting = ds_metaecosystems %>%
filter(!is.na(!!sym(response_variable)))
# Create plot
p = data_for_plotting %>%
ggplot(aes(x = day,
y = get(response_variable))) +
geom_point(stat = "summary",
fun = "mean",
position = position_dodge(dodging),
size = presentation_treatment_points_size) +
scale_x_continuous(breaks = sampling_days,
limits = c(x_min, x_max)) +
ylim(y_min, y_max) +
labs(x = axis_names$axis_name[axis_names$variable == "day"],
y = axis_names$axis_name[axis_names$variable == response_variable]) +
font("xlab", size = presentation_labels_size) +
font("ylab", size = presentation_labels_size) +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = legend_position,
legend.key.width = unit(legend_width_cm, "cm"),
axis.title.x = element_text(size = presentation_labels_size),
axis.title.y = element_text(size = presentation_labels_size),
axis.text.x = element_text(size = presentation_labels_size),
axis.text.y = element_text(size = presentation_labels_size),
plot.margin = unit(c(ggarrange_margin_left,
ggarrange_margin_right,
ggarrange_margin_bottom,
ggarrange_margin_left),
"cm")) +
geom_rect(xmin = grey_background_xmin,
xmax = grey_background_xmax,
ymin = grey_background_ymin,
ymax = grey_background_ymax,
fill = grey_background_fill,
alpha = grey_background_alpha,
color = grey_background_color) +
geom_vline(xintercept = resource_flow_days,
linetype = resource_flow_line_type,
color = resource_flow_line_colour,
linewidth = resource_flow_line_width)
# Save the plot in the same way you will save the following plots using ggarrange
ggpubr::ggarrange(p,
align = "v",
label.x = plot_label_x,
label.y = plot_label_y) %>%
print()
# Close the current graphics device to properly save the image
dev.off()
# --- CREATE PLOTS LINE BY LINE --- #
# Plot filled plots, adding to every plot a new line.
for(i in 1:length(metaeco_type_n_connection_selected)) {
# Generate the image where to save the plot
pdf(file = here("..",
"3_results",
"figures",
"presentations",
paste0("metaecosystems_",
i,
".pdf")),
width = presentation_figure_width_inches,
height = presentation_figure_height_inches)
# Prepare data for plotting
data_for_plotting = ds_metaecosystems %>%
filter(metaeco_type_n_connection %in% metaeco_type_n_connection_selected[1:i],
!is.na(!!sym(response_variable))) %>%
summarySE(measurevar = response_variable,
groupvars = c("day", "metaecosystem_type", "connection"))
# Create plot
p = data_for_plotting %>%
ggplot(aes(x = day,
y = get(response_variable),
group = interaction(day, metaecosystem_type, connection),
color = metaecosystem_type,
linetype = connection)) +
# Points
geom_point(stat = "summary",
fun = "mean",
position = position_dodge(dodging),
size = presentation_treatment_points_size) +
geom_errorbar(aes(ymax = get(response_variable) + ci,
ymin = get(response_variable) - ci),
width = width_errorbar,
position = position_dodge(dodging)) +
# Lines
geom_line(stat = "summary",
fun = "mean",
aes(group = interaction(metaecosystem_type, connection)),
position = position_dodge(dodging),
linewidth = presentation_treatment_linewidth) +
scale_linetype_manual(values = treatment_linetypes) +
# Axes & legend
labs(x = axis_names$axis_name[axis_names$variable == "day"],
y = axis_names$axis_name[axis_names$variable == response_variable],
color = "") +
guides(color = "none",
linetype = "none") +
xlim(x_min, x_max) +
ylim(y_min, y_max) +
scale_x_continuous(breaks = unique(ds_metaecosystems$day)) +
scale_color_manual(values = treatment_colours) +
guides(color = guide_legend(order = 1,
title = NULL),
linetype = guide_legend(order = 2,
title = NULL)) +
# Extra graphic elements
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "none",
legend.key.width = unit(legend_width_cm, "cm"),
axis.title.x = element_text(size = presentation_labels_size),
axis.title.y = element_text(size = presentation_labels_size),
axis.text.x = element_text(size = presentation_labels_size),
axis.text.y = element_text(size = presentation_labels_size)) +
geom_rect(xmin = grey_background_xmin,
xmax = grey_background_xmax,
ymin = grey_background_ymin,
ymax = grey_background_ymax,
fill = grey_background_fill,
alpha = grey_background_alpha,
color = grey_background_color) +
geom_vline(xintercept = resource_flow_days,
linetype = resource_flow_line_type,
color = resource_flow_line_colour,
linewidth = resource_flow_line_width)
# Save the plot in the same way you will save the following plots using ggarrange
ggpubr::ggarrange(p,
align = "v",
label.x = plot_label_x,
label.y = plot_label_y) %>%
print()
# Close the current graphics device to properly save the image
dev.off()
}
metaecosystem_type_selected = "autotrophic-dominated"
# --- SET PARAMETERS FOR PLOTTING --- #
response_variable = "bioarea_mm2_per_ml"
x_min = 0
x_max = sampling_days[length(sampling_days)]
y_min = -1
y_max = 22
# --- PREPARE DATA FOR PLOTTING --- #
data_for_plotting = ds_ecosystems %>%
filter(metaecosystem_type == metaecosystem_type_selected)
# --- GET ECOSYSTEM TYPES TO PLOT --- #
ecosystem_type_selected = data_for_plotting %>%
pull(ecosystem_type) %>%
unique()
# --- CREATE AN EMPTY PLOT --- #
# Generate the image where to save the plot
pdf(file = here("..",
"3_results",
"figures",
"presentations",
paste0(metaecosystem_type_selected,
"_ecosystems_0.pdf")),
width = presentation_figure_width_inches,
height = presentation_figure_height_inches)
# Prepare data for plotting
data_for_plotting_2 = data_for_plotting %>%
filter(ecosystem_type == "",
!is.na(!!sym(response_variable)))
# Create plot
p = data_for_plotting_2 %>%
ggplot(aes(x = day,
y = get(response_variable))) +
scale_x_continuous(breaks = sampling_days,
limits = c(x_min, x_max)) +
ylim(y_min, y_max) +
labs(x = axis_names$axis_name[axis_names$variable == "day"],
y = axis_names$axis_name[axis_names$variable == response_variable]) +
geom_vline(xintercept = resource_flow_days,
linetype = resource_flow_line_type,
color = resource_flow_line_colour,
linewidth = resource_flow_line_width) +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = legend_position,
legend.key.width = unit(legend_width_cm, "cm"),
axis.title.x = element_text(size = presentation_labels_size),
axis.title.y = element_text(size = presentation_labels_size),
axis.text.x = element_text(size = presentation_labels_size),
axis.text.y = element_text(size = presentation_labels_size),
plot.margin = unit(c(ggarrange_margin_left,
ggarrange_margin_right,
ggarrange_margin_bottom,
ggarrange_margin_left),
"cm")) +
font("xlab", size = presentation_labels_size) +
font("ylab", size = presentation_labels_size)
# Show plot
p
# Give the plot a ggpubr format
ggpubr::ggarrange(p,
align = "v",
label.x = 0.1,
label.y = 0.8) %>%
print()
# Close the current graphics device to properly save the image
dev.off()
# --- CREATE PLOTS LINE BY LINE --- #
# Plot filled plots, adding to every plot a new line.
for(i in 1:length(ecosystem_type_selected)) {
# Prepare data for plotting
data_for_plotting_2 = data_for_plotting %>%
filter(ecosystem_type %in% ecosystem_type_selected[1:i],
!is.na(!!sym(response_variable))) %>%
summarySE(measurevar = response_variable,
groupvars = c("day", "ecosystem_type", "connection"))
# Create plot
p = data_for_plotting_2 %>%
ggplot(aes(x = day,
y = get(response_variable),
group = interaction(day, ecosystem_type, connection),
color = ecosystem_type,
linetype = connection)) +
# Points
geom_point(stat = "summary",
fun = "mean",
position = position_dodge(dodging),
size = presentation_treatment_points_size) +
geom_errorbar(aes(ymax = get(response_variable) + ci,
ymin = get(response_variable) - ci),
width = width_errorbar,
position = position_dodge(dodging)) +
# Lines
geom_line(stat = "summary",
fun = "mean",
aes(group = interaction(ecosystem_type, connection)),
position = position_dodge(dodging),
linewidth = presentation_treatment_linewidth) +
scale_linetype_manual(values = treatment_linetypes) +
# Axes & legend
labs(x = axis_names$axis_name[axis_names$variable == "day"],
y = axis_names$axis_name[axis_names$variable == response_variable],
color = "") +
guides(color = "none",
linetype = "none") +
xlim(x_min, x_max) +
ylim(y_min, y_max) +
scale_x_continuous(breaks = unique(data_for_plotting_2$day)) +
scale_color_manual(values = treatment_colours) +
guides(color = guide_legend(order = 1,
title = NULL),
linetype = guide_legend(order = 2,
title = NULL)) +
# Extra graphic elements
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "none",
legend.key.width = unit(legend_width_cm, "cm"),
axis.title.x = element_text(size = presentation_labels_size),
axis.title.y = element_text(size = presentation_labels_size),
axis.text.x = element_text(size = presentation_labels_size),
axis.text.y = element_text(size = presentation_labels_size)) +
geom_rect(xmin = grey_background_xmin,
xmax = grey_background_xmax,
ymin = grey_background_ymin,
ymax = grey_background_ymax,
fill = grey_background_fill,
alpha = grey_background_alpha,
color = grey_background_color) +
geom_vline(xintercept = resource_flow_days,
linetype = resource_flow_line_type,
color = resource_flow_line_colour,
linewidth = resource_flow_line_width)
# Give the plot a ggpubr format
p = ggpubr::ggarrange(p,
align = "v",
label.x = 0.1,
label.y = 0.8) %>%
print()
# Generate the image where to save the plot
pdf(file = here("..",
"3_results",
"figures",
"presentations",
paste0(metaecosystem_type_selected,
"_ecosystems_",
i,
".pdf")),
width = presentation_figure_width_inches,
height = presentation_figure_height_inches)
# Save image to the file
print(p)
# Close the current graphics device to properly save the image
dev.off()
}
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
metaecosystem_type_selected = "heterotrophic-dominated"
# --- SET PARAMETERS FOR PLOTTING --- #
response_variable = "bioarea_mm2_per_ml"
x_min = 0
x_max = sampling_days[length(sampling_days)]
y_min = -1
y_max = 22
# --- PREPARE DATA FOR PLOTTING --- #
data_for_plotting = ds_ecosystems %>%
filter(metaecosystem_type == metaecosystem_type_selected)
# --- GET ECOSYSTEM TYPES TO PLOT --- #
ecosystem_type_selected = data_for_plotting %>%
pull(ecosystem_type) %>%
unique()
# --- CREATE AN EMPTY PLOT --- #
# Generate the image where to save the plot
pdf(file = here("..",
"3_results",
"figures",
"presentations",
paste0(metaecosystem_type_selected,
"_ecosystems_0.pdf")),
width = presentation_figure_width_inches,
height = presentation_figure_height_inches)
# Prepare data for plotting
data_for_plotting_2 = data_for_plotting %>%
filter(ecosystem_type == "",
!is.na(!!sym(response_variable)))
# Create plot
p = data_for_plotting_2 %>%
ggplot(aes(x = day,
y = get(response_variable))) +
scale_x_continuous(breaks = sampling_days,
limits = c(x_min, x_max)) +
ylim(y_min, y_max) +
labs(x = axis_names$axis_name[axis_names$variable == "day"],
y = axis_names$axis_name[axis_names$variable == response_variable]) +
geom_vline(xintercept = resource_flow_days,
linetype = resource_flow_line_type,
color = resource_flow_line_colour,
linewidth = resource_flow_line_width) +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = legend_position,
legend.key.width = unit(legend_width_cm, "cm"),
axis.title.x = element_text(size = presentation_labels_size),
axis.title.y = element_text(size = presentation_labels_size),
axis.text.x = element_text(size = presentation_labels_size),
axis.text.y = element_text(size = presentation_labels_size),
plot.margin = unit(c(ggarrange_margin_left,
ggarrange_margin_right,
ggarrange_margin_bottom,
ggarrange_margin_left),
"cm")) +
font("xlab", size = presentation_labels_size) +
font("ylab", size = presentation_labels_size)
# Show plot
p
# Give the plot a ggpubr format
ggpubr::ggarrange(p,
align = "v",
label.x = 0.1,
label.y = 0.8) %>%
print()
# Close the current graphics device to properly save the image
dev.off()
# --- CREATE PLOTS LINE BY LINE --- #
# Plot filled plots, adding to every plot a new line.
for(i in 1:length(ecosystem_type_selected)) {
# Prepare data for plotting
data_for_plotting_2 = data_for_plotting %>%
filter(ecosystem_type %in% ecosystem_type_selected[1:i],
!is.na(!!sym(response_variable))) %>%
summarySE(measurevar = response_variable,
groupvars = c("day", "ecosystem_type", "connection"))
# Create plot
p = data_for_plotting_2 %>%
ggplot(aes(x = day,
y = get(response_variable),
group = interaction(day, ecosystem_type, connection),
color = ecosystem_type,
linetype = connection)) +
# Points
geom_point(stat = "summary",
fun = "mean",
position = position_dodge(dodging),
size = presentation_treatment_points_size) +
geom_errorbar(aes(ymax = get(response_variable) + ci,
ymin = get(response_variable) - ci),
width = width_errorbar,
position = position_dodge(dodging)) +
# Lines
geom_line(stat = "summary",
fun = "mean",
aes(group = interaction(ecosystem_type, connection)),
position = position_dodge(dodging),
linewidth = presentation_treatment_linewidth) +
scale_linetype_manual(values = treatment_linetypes) +
# Axes & legend
labs(x = axis_names$axis_name[axis_names$variable == "day"],
y = axis_names$axis_name[axis_names$variable == response_variable],
color = "") +
guides(color = "none",
linetype = "none") +
xlim(x_min, x_max) +
ylim(y_min, y_max) +
scale_x_continuous(breaks = unique(data_for_plotting_2$day)) +
scale_color_manual(values = treatment_colours) +
guides(color = guide_legend(order = 1,
title = NULL),
linetype = guide_legend(order = 2,
title = NULL)) +
# Extra graphic elements
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "none",
legend.key.width = unit(legend_width_cm, "cm"),
axis.title.x = element_text(size = presentation_labels_size),
axis.title.y = element_text(size = presentation_labels_size),
axis.text.x = element_text(size = presentation_labels_size),
axis.text.y = element_text(size = presentation_labels_size)) +
geom_rect(xmin = grey_background_xmin,
xmax = grey_background_xmax,
ymin = grey_background_ymin,
ymax = grey_background_ymax,
fill = grey_background_fill,
alpha = grey_background_alpha,
color = grey_background_color) +
geom_vline(xintercept = resource_flow_days,
linetype = resource_flow_line_type,
color = resource_flow_line_colour,
linewidth = resource_flow_line_width)
# Give the plot a ggpubr format
p = ggpubr::ggarrange(p,
align = "v",
label.x = 0.1,
label.y = 0.8) %>%
print()
# Generate the image where to save the plot
pdf(file = here("..",
"3_results",
"figures",
"presentations",
paste0(metaecosystem_type_selected,
"_ecosystems_",
i,
".pdf")),
width = presentation_figure_width_inches,
height = presentation_figure_height_inches)
# Save image to the file
print(p)
# Close the current graphics device to properly save the image
dev.off()
}
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: aarch64-apple-darwin20
## Running under: macOS 15.1.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Europe/Zurich
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] conflicted_1.2.0 gllvm_2.0 TMB_1.9.15 SingleCaseES_0.7.3
## [5] e1071_1.7-16 bemovi_1.0 emmeans_1.10.5 combinat_0.0-8
## [9] Rmisc_1.5.1 betapart_1.6 vegan_2.6-8 lattice_0.22-6
## [13] permute_0.9-7 optimx_2023-10.21 glmmTMB_1.1.10 lmerTest_3.1-3
## [17] lme4_1.1-35.5 Matrix_1.7-0 ggforce_0.4.2 officer_0.6.7
## [21] broom_1.0.7 rempsyc_0.1.8 plotly_4.10.4 ggpubr_0.6.0
## [25] lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
## [29] purrr_1.0.2 readr_2.1.5 tidyr_1.3.1 tibble_3.2.1
## [33] ggplot2_3.5.1 tidyverse_2.0.0 plyr_1.8.9 data.table_1.16.2
## [37] renv_1.0.11 testthat_3.2.1.1 here_1.0.1
##
## loaded via a namespace (and not attached):
## [1] circular_0.5-1 rstudioapi_0.17.1 jsonlite_1.8.9
## [4] magrittr_2.0.3 estimability_1.5.1 farver_2.1.2
## [7] nloptr_2.1.1 rmarkdown_2.29 ragg_1.3.3
## [10] vctrs_0.6.5 memoise_2.0.1 minqa_1.2.8
## [13] askpass_1.2.1 rstatix_0.7.2 itertools_0.1-3
## [16] htmltools_0.5.8.1 Formula_1.2-5 sass_0.4.9
## [19] pracma_2.4.4 bslib_0.8.0 desc_1.4.3
## [22] htmlwidgets_1.6.4 cachem_1.1.0 uuid_1.2-1
## [25] alabama_2023.1.0 lifecycle_1.0.4 minpack.lm_1.2-4
## [28] iterators_1.0.14 pkgconfig_2.0.3 R6_2.5.1
## [31] fastmap_1.2.0 rbibutils_2.3 magic_1.6-1
## [34] digest_0.6.37 numDeriv_2016.8-1.1 colorspace_2.1-1
## [37] rprojroot_2.0.4 pkgload_1.4.0 crosstalk_1.2.1
## [40] textshaping_0.4.0 labeling_0.4.3 fansi_1.0.6
## [43] timechange_0.3.0 httr_1.4.7 polyclip_1.10-7
## [46] abind_1.4-8 mgcv_1.9-1 compiler_4.4.1
## [49] proxy_0.4-27 withr_3.0.2 backports_1.5.0
## [52] carData_3.0-5 ggsignif_0.6.4 MASS_7.3-60.2
## [55] openssl_2.2.2 tools_4.4.1 ape_5.8
## [58] zip_2.3.1 glue_1.8.0 rcdd_1.6
## [61] nlme_3.1-164 grid_4.4.1 cluster_2.1.6
## [64] generics_0.1.3 snow_0.4-4 gtable_0.3.6
## [67] tzdb_0.4.0 class_7.3-22 hms_1.1.3
## [70] xml2_1.3.6 car_3.1-3 utf8_1.2.4
## [73] foreach_1.5.2 pillar_1.9.0 splines_4.4.1
## [76] tweenr_2.0.3 tidyselect_1.2.1 knitr_1.49
## [79] reformulas_0.4.0 xfun_0.49 brio_1.1.5
## [82] stringi_1.8.4 lazyeval_0.2.2 yaml_2.3.10
## [85] boot_1.3-30 evaluate_1.0.1 codetools_0.2-20
## [88] cli_3.6.3 xtable_1.8-4 geometry_0.5.0
## [91] systemfonts_1.1.0 Rdpack_2.6.2 munsell_0.5.1
## [94] jquerylib_0.1.4 Rcpp_1.0.13-1 doSNOW_1.0.20
## [97] parallel_4.4.1 picante_1.8.2 viridisLite_0.4.2
## [100] mvtnorm_1.3-2 scales_1.3.0 rlang_1.1.4
## [103] cowplot_1.1.3 fastmatch_1.1-4
## Time difference of 5.338679 mins
If you want to change a certain part of the code using the following code in Unix:
#Rmd script
cd /Users/ema/Documents/GitHub/AHSize/r_files; sed -i '' 's/old_string/new_string/g' *.Rmd
#R script
cd /Users/ema/Documents/GitHub/AHSize/r_files/functions; sed -i '' 's/old_string/new_string/g' *.R
If you want to share a dataset and get a reproducible object, use the following R code:
dput(data)
This are 20 random columns of ds_metaecosystems that you can use to input into chatGPT to give it an idea of what the structure of your data is.
ds_metaecosystems %>%
sample_n(20,
replace = FALSE) %>%
dput()
## structure(list(time_point = c(2L, 3L, 0L, 2L, 3L, 0L, 0L, 1L,
## 1L, 1L, 1L, 2L, 3L, 4L, 0L, 0L, 2L, 3L, 2L, 4L), day = c(12,
## 19, 0, 12, 19, 0, 0, 5, 5, 5, 5, 12, 19, 26, 0, 0, 12, 19, 12,
## 26), system_nr = c(37L, 1050L, 1035L, 1005L, 1053L, 1038L, 44L,
## 38L, 1039L, 40L, 37L, 41L, 37L, 1069L, 1025L, 1067L, 1006L, 1055L,
## 1042L, 1034L), ecosystems_combined = c("43|44", "30|5", "27|5",
## "6|25", "16|13", "28|3", "57|58", "45|46", "28|4", "49|50", "43|44",
## "51|52", "43|44", "19|14", "10|25", "19|12", "7|21", "16|15",
## "29|2", "27|4"), metaecosystem_type = structure(c(3L, 3L, 3L,
## 1L, 2L, 3L, 1L, 3L, 3L, 3L, 3L, 1L, 3L, 2L, 1L, 2L, 1L, 2L, 3L,
## 3L), levels = c("autotrophic-dominated", "equally-dominated",
## "heterotrophic-dominated"), class = "factor"), metaeco_type_n_connection = c("heterotrophic-dominated connected",
## "heterotrophic-dominated unconnected", "heterotrophic-dominated unconnected",
## "autotrophic-dominated unconnected", "equally-dominated unconnected",
## "heterotrophic-dominated unconnected", "autotrophic-dominated connected",
## "heterotrophic-dominated connected", "heterotrophic-dominated unconnected",
## "heterotrophic-dominated connected", "heterotrophic-dominated connected",
## "autotrophic-dominated connected", "heterotrophic-dominated connected",
## "equally-dominated unconnected", "autotrophic-dominated unconnected",
## "equally-dominated unconnected", "autotrophic-dominated unconnected",
## "equally-dominated unconnected", "heterotrophic-dominated unconnected",
## "heterotrophic-dominated unconnected"), connection = structure(c(2L,
## 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
## 1L, 1L, 1L), levels = c("unconnected", "connected"), class = "factor"),
## total_metaecosystem_bioarea_mm2 = c(137.545433398256, 147.213791659884,
## 188.796475098837, 580.518071515988, 258.33182222093, 188.796475098837,
## 122.615820680233, 390.365958401163, 532.270890209302, 229.267097646802,
## 268.922248927326, 506.860594953488, 86.4914219607558, 175.472257068314,
## 122.615820680233, 155.706147889535, 303.356856375, 300.362608609012,
## 281.900154872093, 293.350654295058)), row.names = c(NA, -20L
## ), class = "data.frame")
# Set parameters
resource_flow_ml = 5.25
small_ecos_ml = 7.5
medium_ecos_ml = 22.5
large_ecos_ml = 37.5
colour_autotorophs_dark = "#2ca25f"
colour_autotorophs_light = "#bbe1c1"
colour_heterotrophs_dark = "#3182bd"
colour_heterotrophs_light = "#ADD8E6"
# Create a function to plot the circles
generate_circle_plot <- function(eco_ml,
disturbed_percentage,
diameter,
colour_selected) {
# Select colour
if(colour_selected == "autotorophs_dark"){
colour_circles = colour_autotorophs_dark
}
if(colour_selected == "autotorophs_light"){
colour_circles = colour_autotorophs_light
}
if(colour_selected == "heterotrophs_dark"){
colour_circles = colour_heterotrophs_dark
}
if(colour_selected == "heterotrophs_light"){
colour_circles = colour_heterotrophs_light
}
# Calculate circle parameters
circle_area <- eco_ml # mm²
radius <- sqrt(circle_area / pi) # Calculate radius
# Target segment coverage (minor segment)
minor_target_coverage <- (100 - disturbed_percentage) / 100
# Function to calculate segment height for a given coverage
calculate_segment_height <- function(radius, target_coverage) {
# Define optimization function
optimize_height <- function(h) {
r <- radius
segment_area <- r*r * acos((r-h)/r) - (r-h) * sqrt(2*r*h - h*h)
abs(segment_area / (pi * r*r) - target_coverage)
}
# Use optimization to find the height
result <- optim(par = radius/2,
fn = optimize_height,
method = "Brent",
lower = 0,
upper = radius)
return(result$par)
}
# Calculate the segment height for the desired coverage
minor_segment_height <- calculate_segment_height(radius, minor_target_coverage)
# Angle corresponding to the minor segment's height
theta_minor <- acos((radius - minor_segment_height) / radius)
# Create points for the circle and the chord
n_points <- 200
angle_seq <- seq(0, 2*pi, length.out = n_points) # Full circle
# Circle data
circle_data <- data.frame(x = radius * cos(angle_seq),
y = radius * sin(angle_seq))
# Chord endpoints (start and end of the minor segment)
chord_x <- c(radius * cos(theta_minor), radius * cos(-theta_minor))
chord_y <- c(radius * sin(theta_minor), radius * sin(-theta_minor))
# Plot
plot_to_save <- ggplot() +
# Outer full circle
geom_polygon(data = circle_data,
aes(x = x,
y = y),
fill = colour_circles,
color = "black") +
# Chord that divides the circle
geom_segment(aes(x = chord_x[1],
y = chord_y[1],
xend = chord_x[2],
yend = chord_y[2]),
color = "red",
size = 0.5) +
coord_fixed() +
theme_void()
# Save the plot
ggsave(here("..",
"3_results",
"figures",
"other_graphic_elements",
paste0(colour_selected,
"_",
eco_ml,
"_ml.png")),
plot = plot_to_save,
width = diameter,
height = diameter,
dpi = 300)
}
# Autotrophic ecosystems with dark colour
colour_selected = "autotorophs_dark"
generate_circle_plot(eco_ml = small_ecos_ml,
disturbed_percentage = (resource_flow_ml/ small_ecos_ml) * 100,
diameter = 3.088,
colour_selected = colour_selected)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
generate_circle_plot(eco_ml = medium_ecos_ml,
disturbed_percentage = 100 - (resource_flow_ml/ medium_ecos_ml) * 100,
diameter = 5.360,
colour_selected = colour_selected)
generate_circle_plot(eco_ml = large_ecos_ml,
disturbed_percentage = 100 - (resource_flow_ml/ large_ecos_ml) * 100,
diameter = 6.920,
colour_selected = colour_selected)
# Autotrophic ecosystems with light colour
colour_selected = "autotorophs_light"
generate_circle_plot(eco_ml = small_ecos_ml,
disturbed_percentage = (resource_flow_ml/ small_ecos_ml) * 100,
diameter = 3.088,
colour_selected = colour_selected)
generate_circle_plot(eco_ml = medium_ecos_ml,
disturbed_percentage = 100 - (resource_flow_ml/ medium_ecos_ml) * 100,
diameter = 5.360,
colour_selected = colour_selected)
generate_circle_plot(eco_ml = large_ecos_ml,
disturbed_percentage = 100 - (resource_flow_ml/ large_ecos_ml) * 100,
diameter = 6.920,
colour_selected = colour_selected)
# Heterotrophic ecosystems with dark colour
colour_selected = "heterotrophs_dark"
generate_circle_plot(eco_ml = small_ecos_ml,
disturbed_percentage = (resource_flow_ml/ small_ecos_ml) * 100,
diameter = 3.088,
colour_selected = colour_selected)
generate_circle_plot(eco_ml = medium_ecos_ml,
disturbed_percentage = 100 - (resource_flow_ml/ medium_ecos_ml) * 100,
diameter = 5.360,
colour_selected = colour_selected)
generate_circle_plot(eco_ml = large_ecos_ml,
disturbed_percentage = 100 - (resource_flow_ml/ large_ecos_ml) * 100,
diameter = 6.920,
colour_selected = colour_selected)
# Autotrophic ecosystems with light colour
colour_selected = "heterotrophs_light"
generate_circle_plot(eco_ml = small_ecos_ml,
disturbed_percentage = (resource_flow_ml/ small_ecos_ml) * 100,
diameter = 3.088,
colour_selected = colour_selected)
generate_circle_plot(eco_ml = medium_ecos_ml,
disturbed_percentage = 100 - (resource_flow_ml/ medium_ecos_ml) * 100,
diameter = 5.360,
colour_selected = colour_selected)
generate_circle_plot(eco_ml = large_ecos_ml,
disturbed_percentage = 100 - (resource_flow_ml/ large_ecos_ml) * 100,
diameter = 6.920,
colour_selected = colour_selected)